BanglaByT5 / trainer_state.json
Vacaspati's picture
Upload 11 files
f8c2081 verified
{
"best_metric": 1.1492022167658433e-05,
"best_model_checkpoint": "./ByT5/results_bangla_byt5_full_512/checkpoint-750000",
"epoch": 1.91562777117507,
"eval_steps": 150000,
"global_step": 1200000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007981782379896125,
"grad_norm": 1.0358465909957886,
"learning_rate": 3e-05,
"loss": 0.0,
"step": 500
},
{
"epoch": 0.001596356475979225,
"grad_norm": 0.3616809546947479,
"learning_rate": 2.999998820092034e-05,
"loss": 0.0,
"step": 1000
},
{
"epoch": 0.0023945347139688374,
"grad_norm": 0.09030964225530624,
"learning_rate": 2.999995280369993e-05,
"loss": 0.0,
"step": 1500
},
{
"epoch": 0.00319271295195845,
"grad_norm": 0.7502796649932861,
"learning_rate": 2.9999893808394453e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 0.003990891189948062,
"grad_norm": 407.71380615234375,
"learning_rate": 2.9999811215096716e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 0.004789069427937675,
"grad_norm": 0.033743228763341904,
"learning_rate": 2.9999705023936664e-05,
"loss": 0.0,
"step": 3000
},
{
"epoch": 0.005587247665927287,
"grad_norm": 62.31935119628906,
"learning_rate": 2.999957523508135e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 0.0063854259039169,
"grad_norm": 0.005816894117742777,
"learning_rate": 2.9999421848734972e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 0.007183604141906512,
"grad_norm": 0.01169545203447342,
"learning_rate": 2.9999244865138825e-05,
"loss": 0.0,
"step": 4500
},
{
"epoch": 0.007981782379896125,
"grad_norm": 0.1192362904548645,
"learning_rate": 2.999904428457135e-05,
"loss": 0.0,
"step": 5000
},
{
"epoch": 0.008779960617885737,
"grad_norm": 0.03755159303545952,
"learning_rate": 2.99988201073481e-05,
"loss": 0.0,
"step": 5500
},
{
"epoch": 0.00957813885587535,
"grad_norm": 0.006901148706674576,
"learning_rate": 2.999857233382175e-05,
"loss": 0.0,
"step": 6000
},
{
"epoch": 0.010376317093864962,
"grad_norm": 0.005667822901159525,
"learning_rate": 2.9998300964382105e-05,
"loss": 0.0,
"step": 6500
},
{
"epoch": 0.011174495331854575,
"grad_norm": 0.009163002483546734,
"learning_rate": 2.9998005999456086e-05,
"loss": 0.0,
"step": 7000
},
{
"epoch": 0.011972673569844187,
"grad_norm": 26.250308990478516,
"learning_rate": 2.9997687439507734e-05,
"loss": 0.0,
"step": 7500
},
{
"epoch": 0.0127708518078338,
"grad_norm": 0.0081906383857131,
"learning_rate": 2.9997345285038203e-05,
"loss": 0.0,
"step": 8000
},
{
"epoch": 0.013569030045823412,
"grad_norm": 0.005343407858163118,
"learning_rate": 2.9996979536585784e-05,
"loss": 0.0,
"step": 8500
},
{
"epoch": 0.014367208283813025,
"grad_norm": 0.004830050282180309,
"learning_rate": 2.999659019472588e-05,
"loss": 0.0,
"step": 9000
},
{
"epoch": 0.015165386521802637,
"grad_norm": 0.005266611464321613,
"learning_rate": 2.9996177260070993e-05,
"loss": 0.0,
"step": 9500
},
{
"epoch": 0.01596356475979225,
"grad_norm": 0.006467476487159729,
"learning_rate": 2.999574073327077e-05,
"loss": 0.0,
"step": 10000
},
{
"epoch": 0.016761742997781864,
"grad_norm": 0.005009402055293322,
"learning_rate": 2.9995280615011947e-05,
"loss": 0.0,
"step": 10500
},
{
"epoch": 0.017559921235771474,
"grad_norm": 0.10507344454526901,
"learning_rate": 2.99947969060184e-05,
"loss": 0.0,
"step": 11000
},
{
"epoch": 0.01835809947376109,
"grad_norm": 0.004155490081757307,
"learning_rate": 2.9994289607051097e-05,
"loss": 0.0,
"step": 11500
},
{
"epoch": 0.0191562777117507,
"grad_norm": 0.005153627134859562,
"learning_rate": 2.9993758718908127e-05,
"loss": 0.0,
"step": 12000
},
{
"epoch": 0.019954455949740314,
"grad_norm": 0.006224519573152065,
"learning_rate": 2.9993204242424692e-05,
"loss": 0.0,
"step": 12500
},
{
"epoch": 0.020752634187729924,
"grad_norm": 0.006426576059311628,
"learning_rate": 2.9992626178473094e-05,
"loss": 0.0,
"step": 13000
},
{
"epoch": 0.02155081242571954,
"grad_norm": 0.004561313893646002,
"learning_rate": 2.9992024527962764e-05,
"loss": 0.0,
"step": 13500
},
{
"epoch": 0.02234899066370915,
"grad_norm": 0.004213905427604914,
"learning_rate": 2.9991399291840204e-05,
"loss": 0.0,
"step": 14000
},
{
"epoch": 0.023147168901698763,
"grad_norm": 11.763763427734375,
"learning_rate": 2.9990750471089053e-05,
"loss": 0.0,
"step": 14500
},
{
"epoch": 0.023945347139688374,
"grad_norm": 3612.91552734375,
"learning_rate": 2.9990078066730048e-05,
"loss": 0.0,
"step": 15000
},
{
"epoch": 0.02474352537767799,
"grad_norm": 0.0035761911422014236,
"learning_rate": 2.9989382079821016e-05,
"loss": 0.0,
"step": 15500
},
{
"epoch": 0.0255417036156676,
"grad_norm": 0.00994983222335577,
"learning_rate": 2.998866251145689e-05,
"loss": 0.0,
"step": 16000
},
{
"epoch": 0.026339881853657213,
"grad_norm": 1838.4853515625,
"learning_rate": 2.9987919362769707e-05,
"loss": 0.0,
"step": 16500
},
{
"epoch": 0.027138060091646824,
"grad_norm": 980.426513671875,
"learning_rate": 2.998715263492859e-05,
"loss": 0.0,
"step": 17000
},
{
"epoch": 0.02793623832963644,
"grad_norm": 0.005308611784130335,
"learning_rate": 2.9986362329139772e-05,
"loss": 0.0,
"step": 17500
},
{
"epoch": 0.02873441656762605,
"grad_norm": 0.010192105546593666,
"learning_rate": 2.9985548446646566e-05,
"loss": 0.0,
"step": 18000
},
{
"epoch": 0.029532594805615663,
"grad_norm": 235.67745971679688,
"learning_rate": 2.9984710988729377e-05,
"loss": 0.0,
"step": 18500
},
{
"epoch": 0.030330773043605274,
"grad_norm": 0.07810617983341217,
"learning_rate": 2.9983849956705706e-05,
"loss": 0.0,
"step": 19000
},
{
"epoch": 0.031128951281594888,
"grad_norm": 0.008812849409878254,
"learning_rate": 2.9982965351930143e-05,
"loss": 0.0,
"step": 19500
},
{
"epoch": 0.0319271295195845,
"grad_norm": 0.0035495434422045946,
"learning_rate": 2.9982057175794347e-05,
"loss": 0.0,
"step": 20000
},
{
"epoch": 0.03272530775757411,
"grad_norm": 59.26214599609375,
"learning_rate": 2.998112542972708e-05,
"loss": 0.0,
"step": 20500
},
{
"epoch": 0.03352348599556373,
"grad_norm": 2.114682674407959,
"learning_rate": 2.9980170115194166e-05,
"loss": 0.0,
"step": 21000
},
{
"epoch": 0.03432166423355334,
"grad_norm": 0.00623566098511219,
"learning_rate": 2.9979191233698526e-05,
"loss": 0.0,
"step": 21500
},
{
"epoch": 0.03511984247154295,
"grad_norm": 0.7668495774269104,
"learning_rate": 2.9978188786780137e-05,
"loss": 0.0,
"step": 22000
},
{
"epoch": 0.03591802070953256,
"grad_norm": 0.009474883787333965,
"learning_rate": 2.997716277601607e-05,
"loss": 0.0,
"step": 22500
},
{
"epoch": 0.03671619894752218,
"grad_norm": 0.0059985085390508175,
"learning_rate": 2.997611320302044e-05,
"loss": 0.0,
"step": 23000
},
{
"epoch": 0.03751437718551179,
"grad_norm": 0.0021645475644618273,
"learning_rate": 2.9975040069444463e-05,
"loss": 0.0,
"step": 23500
},
{
"epoch": 0.0383125554235014,
"grad_norm": 0.0018036281690001488,
"learning_rate": 2.9973943376976393e-05,
"loss": 0.0,
"step": 24000
},
{
"epoch": 0.03911073366149101,
"grad_norm": 740.995849609375,
"learning_rate": 2.9972823127341566e-05,
"loss": 0.0,
"step": 24500
},
{
"epoch": 0.03990891189948063,
"grad_norm": 0.11744941025972366,
"learning_rate": 2.9971679322302368e-05,
"loss": 0.0,
"step": 25000
},
{
"epoch": 0.04070709013747024,
"grad_norm": 0.014604251831769943,
"learning_rate": 2.997051196365824e-05,
"loss": 0.0,
"step": 25500
},
{
"epoch": 0.04150526837545985,
"grad_norm": 0.0021100931335240602,
"learning_rate": 2.996932105324569e-05,
"loss": 0.0001,
"step": 26000
},
{
"epoch": 0.042303446613449466,
"grad_norm": 747.1854858398438,
"learning_rate": 2.9968106592938267e-05,
"loss": 0.0,
"step": 26500
},
{
"epoch": 0.04310162485143908,
"grad_norm": 0.02324901707470417,
"learning_rate": 2.9966868584646574e-05,
"loss": 0.0,
"step": 27000
},
{
"epoch": 0.04389980308942869,
"grad_norm": 0.00435708649456501,
"learning_rate": 2.996560703031826e-05,
"loss": 0.0,
"step": 27500
},
{
"epoch": 0.0446979813274183,
"grad_norm": 0.002938190009444952,
"learning_rate": 2.9964321931938013e-05,
"loss": 0.0,
"step": 28000
},
{
"epoch": 0.045496159565407916,
"grad_norm": 0.0124465087428689,
"learning_rate": 2.9963013291527564e-05,
"loss": 0.0,
"step": 28500
},
{
"epoch": 0.04629433780339753,
"grad_norm": 1176.4481201171875,
"learning_rate": 2.996168111114568e-05,
"loss": 0.0,
"step": 29000
},
{
"epoch": 0.04709251604138714,
"grad_norm": 0.0027969577349722385,
"learning_rate": 2.996032539288817e-05,
"loss": 0.0,
"step": 29500
},
{
"epoch": 0.04789069427937675,
"grad_norm": 0.006387583911418915,
"learning_rate": 2.9958946138887848e-05,
"loss": 0.0,
"step": 30000
},
{
"epoch": 0.048688872517366366,
"grad_norm": 0.0018514322582632303,
"learning_rate": 2.9957543351314586e-05,
"loss": 0.0,
"step": 30500
},
{
"epoch": 0.04948705075535598,
"grad_norm": 0.002022047759965062,
"learning_rate": 2.9956117032375252e-05,
"loss": 0.0,
"step": 31000
},
{
"epoch": 0.05028522899334559,
"grad_norm": 0.0020059312228113413,
"learning_rate": 2.9954667184313755e-05,
"loss": 0.0,
"step": 31500
},
{
"epoch": 0.0510834072313352,
"grad_norm": 0.0016115437028929591,
"learning_rate": 2.995319380941101e-05,
"loss": 0.0,
"step": 32000
},
{
"epoch": 0.051881585469324816,
"grad_norm": 0.020331773906946182,
"learning_rate": 2.995169690998494e-05,
"loss": 0.0,
"step": 32500
},
{
"epoch": 0.05267976370731443,
"grad_norm": 1381.0771484375,
"learning_rate": 2.995017648839049e-05,
"loss": 0.0,
"step": 33000
},
{
"epoch": 0.05347794194530404,
"grad_norm": 235.88551330566406,
"learning_rate": 2.9948632547019604e-05,
"loss": 0.0,
"step": 33500
},
{
"epoch": 0.05427612018329365,
"grad_norm": 0.00871993601322174,
"learning_rate": 2.994706508830122e-05,
"loss": 0.0,
"step": 34000
},
{
"epoch": 0.055074298421283266,
"grad_norm": 0.036717597395181656,
"learning_rate": 2.9945474114701287e-05,
"loss": 0.0,
"step": 34500
},
{
"epoch": 0.05587247665927288,
"grad_norm": 21.093196868896484,
"learning_rate": 2.994385962872274e-05,
"loss": 0.0,
"step": 35000
},
{
"epoch": 0.05667065489726249,
"grad_norm": 0.05062306299805641,
"learning_rate": 2.99422216329055e-05,
"loss": 0.0,
"step": 35500
},
{
"epoch": 0.0574688331352521,
"grad_norm": 0.0021120295859873295,
"learning_rate": 2.9940560129826485e-05,
"loss": 0.0,
"step": 36000
},
{
"epoch": 0.058267011373241716,
"grad_norm": 0.03065457195043564,
"learning_rate": 2.993887512209959e-05,
"loss": 0.0,
"step": 36500
},
{
"epoch": 0.05906518961123133,
"grad_norm": 2.3146629333496094,
"learning_rate": 2.9937166612375685e-05,
"loss": 0.0,
"step": 37000
},
{
"epoch": 0.05986336784922094,
"grad_norm": 7.780862331390381,
"learning_rate": 2.9935434603342616e-05,
"loss": 0.0,
"step": 37500
},
{
"epoch": 0.06066154608721055,
"grad_norm": 0.0014545947778970003,
"learning_rate": 2.99336790977252e-05,
"loss": 0.0,
"step": 38000
},
{
"epoch": 0.061459724325200166,
"grad_norm": 0.6645253896713257,
"learning_rate": 2.9931900098285214e-05,
"loss": 0.0,
"step": 38500
},
{
"epoch": 0.062257902563189776,
"grad_norm": 0.0018458861159160733,
"learning_rate": 2.99300976078214e-05,
"loss": 0.0001,
"step": 39000
},
{
"epoch": 0.06305608080117939,
"grad_norm": 103.37657165527344,
"learning_rate": 2.9928271629169453e-05,
"loss": 0.0,
"step": 39500
},
{
"epoch": 0.063854259039169,
"grad_norm": 6.414219856262207,
"learning_rate": 2.9926422165202025e-05,
"loss": 0.0,
"step": 40000
},
{
"epoch": 0.06465243727715861,
"grad_norm": 0.0033696407917886972,
"learning_rate": 2.992454921882871e-05,
"loss": 0.0,
"step": 40500
},
{
"epoch": 0.06545061551514822,
"grad_norm": 0.001432748162187636,
"learning_rate": 2.9922652792996056e-05,
"loss": 0.0,
"step": 41000
},
{
"epoch": 0.06624879375313784,
"grad_norm": 0.002302892506122589,
"learning_rate": 2.9920732890687525e-05,
"loss": 0.0,
"step": 41500
},
{
"epoch": 0.06704697199112745,
"grad_norm": 0.011892186477780342,
"learning_rate": 2.9918789514923535e-05,
"loss": 0.0,
"step": 42000
},
{
"epoch": 0.06784515022911707,
"grad_norm": 0.0011868340661749244,
"learning_rate": 2.991682266876143e-05,
"loss": 0.0,
"step": 42500
},
{
"epoch": 0.06864332846710668,
"grad_norm": 0.0014516868395730853,
"learning_rate": 2.9914832355295472e-05,
"loss": 0.0,
"step": 43000
},
{
"epoch": 0.06944150670509629,
"grad_norm": 0.0018224489176645875,
"learning_rate": 2.9912818577656834e-05,
"loss": 0.0,
"step": 43500
},
{
"epoch": 0.0702396849430859,
"grad_norm": 0.002470798557624221,
"learning_rate": 2.991078133901362e-05,
"loss": 0.0,
"step": 44000
},
{
"epoch": 0.07103786318107551,
"grad_norm": 0.0014030230231583118,
"learning_rate": 2.9908720642570837e-05,
"loss": 0.0,
"step": 44500
},
{
"epoch": 0.07183604141906512,
"grad_norm": 0.002383199753239751,
"learning_rate": 2.9906636491570395e-05,
"loss": 0.0,
"step": 45000
},
{
"epoch": 0.07263421965705474,
"grad_norm": 0.0012940465239807963,
"learning_rate": 2.9904528889291094e-05,
"loss": 0.0,
"step": 45500
},
{
"epoch": 0.07343239789504435,
"grad_norm": 0.001975142164155841,
"learning_rate": 2.9902397839048644e-05,
"loss": 0.0,
"step": 46000
},
{
"epoch": 0.07423057613303397,
"grad_norm": 0.0015532145043835044,
"learning_rate": 2.990024334419563e-05,
"loss": 0.0,
"step": 46500
},
{
"epoch": 0.07502875437102358,
"grad_norm": 0.013329227454960346,
"learning_rate": 2.989806540812153e-05,
"loss": 0.0,
"step": 47000
},
{
"epoch": 0.07582693260901319,
"grad_norm": 0.0020695908460766077,
"learning_rate": 2.98958640342527e-05,
"loss": 0.0,
"step": 47500
},
{
"epoch": 0.0766251108470028,
"grad_norm": 206.78863525390625,
"learning_rate": 2.9893639226052356e-05,
"loss": 0.0,
"step": 48000
},
{
"epoch": 0.07742328908499241,
"grad_norm": 0.018143679946660995,
"learning_rate": 2.98913909870206e-05,
"loss": 0.0,
"step": 48500
},
{
"epoch": 0.07822146732298202,
"grad_norm": 0.014422932639718056,
"learning_rate": 2.988911932069438e-05,
"loss": 0.0,
"step": 49000
},
{
"epoch": 0.07901964556097164,
"grad_norm": 0.0015423446893692017,
"learning_rate": 2.98868242306475e-05,
"loss": 0.0,
"step": 49500
},
{
"epoch": 0.07981782379896125,
"grad_norm": 2.980130434036255,
"learning_rate": 2.9884505720490628e-05,
"loss": 0.0,
"step": 50000
},
{
"epoch": 0.08061600203695087,
"grad_norm": 0.006626432295888662,
"learning_rate": 2.9882163793871268e-05,
"loss": 0.0,
"step": 50500
},
{
"epoch": 0.08141418027494048,
"grad_norm": 0.0019104316597804427,
"learning_rate": 2.987979845447376e-05,
"loss": 0.0,
"step": 51000
},
{
"epoch": 0.08221235851293009,
"grad_norm": 0.006668619811534882,
"learning_rate": 2.9877409706019286e-05,
"loss": 0.0,
"step": 51500
},
{
"epoch": 0.0830105367509197,
"grad_norm": 0.001458686776459217,
"learning_rate": 2.9874997552265847e-05,
"loss": 0.0,
"step": 52000
},
{
"epoch": 0.08380871498890931,
"grad_norm": 2472.781005859375,
"learning_rate": 2.9872561997008265e-05,
"loss": 0.0,
"step": 52500
},
{
"epoch": 0.08460689322689893,
"grad_norm": 0.0012460598954930902,
"learning_rate": 2.987010304407819e-05,
"loss": 0.0,
"step": 53000
},
{
"epoch": 0.08540507146488854,
"grad_norm": 0.0019219612004235387,
"learning_rate": 2.9867620697344072e-05,
"loss": 0.0,
"step": 53500
},
{
"epoch": 0.08620324970287815,
"grad_norm": 0.002554278587922454,
"learning_rate": 2.986511496071116e-05,
"loss": 0.0,
"step": 54000
},
{
"epoch": 0.08700142794086776,
"grad_norm": 0.039569783955812454,
"learning_rate": 2.9862585838121507e-05,
"loss": 0.0,
"step": 54500
},
{
"epoch": 0.08779960617885738,
"grad_norm": 0.0019195139175280929,
"learning_rate": 2.9860033333553957e-05,
"loss": 0.0,
"step": 55000
},
{
"epoch": 0.08859778441684699,
"grad_norm": 0.002144334837794304,
"learning_rate": 2.985745745102414e-05,
"loss": 0.0,
"step": 55500
},
{
"epoch": 0.0893959626548366,
"grad_norm": 0.0011981218121945858,
"learning_rate": 2.985485819458445e-05,
"loss": 0.0,
"step": 56000
},
{
"epoch": 0.09019414089282621,
"grad_norm": 0.0018990024691447616,
"learning_rate": 2.985223556832408e-05,
"loss": 0.0,
"step": 56500
},
{
"epoch": 0.09099231913081583,
"grad_norm": 0.0011976829264312983,
"learning_rate": 2.9849589576368962e-05,
"loss": 0.0,
"step": 57000
},
{
"epoch": 0.09179049736880544,
"grad_norm": 0.776136577129364,
"learning_rate": 2.9846920222881807e-05,
"loss": 0.0,
"step": 57500
},
{
"epoch": 0.09258867560679505,
"grad_norm": 6463.4013671875,
"learning_rate": 2.984422751206206e-05,
"loss": 0.0,
"step": 58000
},
{
"epoch": 0.09338685384478466,
"grad_norm": 3361.28564453125,
"learning_rate": 2.9841511448145938e-05,
"loss": 0.0,
"step": 58500
},
{
"epoch": 0.09418503208277428,
"grad_norm": 0.0008196167182177305,
"learning_rate": 2.9838772035406367e-05,
"loss": 0.0,
"step": 59000
},
{
"epoch": 0.09498321032076389,
"grad_norm": 0.004590105731040239,
"learning_rate": 2.9836009278153024e-05,
"loss": 0.0,
"step": 59500
},
{
"epoch": 0.0957813885587535,
"grad_norm": 0.002218448556959629,
"learning_rate": 2.9833223180732315e-05,
"loss": 0.0,
"step": 60000
},
{
"epoch": 0.09657956679674311,
"grad_norm": 0.0009515349520370364,
"learning_rate": 2.9830413747527352e-05,
"loss": 0.0,
"step": 60500
},
{
"epoch": 0.09737774503473273,
"grad_norm": 0.0013340599834918976,
"learning_rate": 2.982758098295796e-05,
"loss": 0.0,
"step": 61000
},
{
"epoch": 0.09817592327272234,
"grad_norm": 0.0008279486792162061,
"learning_rate": 2.9824724891480688e-05,
"loss": 0.0,
"step": 61500
},
{
"epoch": 0.09897410151071195,
"grad_norm": 0.002933940151706338,
"learning_rate": 2.9821845477588752e-05,
"loss": 0.0,
"step": 62000
},
{
"epoch": 0.09977227974870156,
"grad_norm": 0.0022352703381329775,
"learning_rate": 2.98189427458121e-05,
"loss": 0.0,
"step": 62500
},
{
"epoch": 0.10057045798669118,
"grad_norm": 0.0006310699391178787,
"learning_rate": 2.9816016700717314e-05,
"loss": 0.0,
"step": 63000
},
{
"epoch": 0.10136863622468079,
"grad_norm": 0.0008447402506135404,
"learning_rate": 2.9813067346907694e-05,
"loss": 0.0,
"step": 63500
},
{
"epoch": 0.1021668144626704,
"grad_norm": 0.001062876428477466,
"learning_rate": 2.9810094689023198e-05,
"loss": 0.0,
"step": 64000
},
{
"epoch": 0.10296499270066001,
"grad_norm": 0.0022323522716760635,
"learning_rate": 2.9807098731740432e-05,
"loss": 0.0,
"step": 64500
},
{
"epoch": 0.10376317093864963,
"grad_norm": 0.0006886335904709995,
"learning_rate": 2.980407947977268e-05,
"loss": 0.0,
"step": 65000
},
{
"epoch": 0.10456134917663924,
"grad_norm": 0.001781968749128282,
"learning_rate": 2.9801036937869845e-05,
"loss": 0.0,
"step": 65500
},
{
"epoch": 0.10535952741462885,
"grad_norm": 0.0010141967795789242,
"learning_rate": 2.9797971110818502e-05,
"loss": 0.0,
"step": 66000
},
{
"epoch": 0.10615770565261846,
"grad_norm": 0.0032014320604503155,
"learning_rate": 2.979488200344184e-05,
"loss": 0.0,
"step": 66500
},
{
"epoch": 0.10695588389060807,
"grad_norm": 766.4085693359375,
"learning_rate": 2.9791769620599665e-05,
"loss": 0.0,
"step": 67000
},
{
"epoch": 0.10775406212859769,
"grad_norm": 0.0012145901564508677,
"learning_rate": 2.978863396718842e-05,
"loss": 0.0,
"step": 67500
},
{
"epoch": 0.1085522403665873,
"grad_norm": 0.005595668219029903,
"learning_rate": 2.9785475048141148e-05,
"loss": 0.0,
"step": 68000
},
{
"epoch": 0.10935041860457691,
"grad_norm": 0.0015825449954718351,
"learning_rate": 2.9782292868427488e-05,
"loss": 0.0,
"step": 68500
},
{
"epoch": 0.11014859684256653,
"grad_norm": 0.00758874136954546,
"learning_rate": 2.9779087433053687e-05,
"loss": 0.0,
"step": 69000
},
{
"epoch": 0.11094677508055614,
"grad_norm": 1.4108655452728271,
"learning_rate": 2.9775858747062564e-05,
"loss": 0.0,
"step": 69500
},
{
"epoch": 0.11174495331854575,
"grad_norm": 0.0010229300241917372,
"learning_rate": 2.9772606815533523e-05,
"loss": 0.0,
"step": 70000
},
{
"epoch": 0.11254313155653536,
"grad_norm": 0.12247798591852188,
"learning_rate": 2.9769331643582543e-05,
"loss": 0.0,
"step": 70500
},
{
"epoch": 0.11334130979452497,
"grad_norm": 0.0008451018366031349,
"learning_rate": 2.9766033236362148e-05,
"loss": 0.0,
"step": 71000
},
{
"epoch": 0.11413948803251459,
"grad_norm": 0.000856923230458051,
"learning_rate": 2.9762711599061435e-05,
"loss": 0.0,
"step": 71500
},
{
"epoch": 0.1149376662705042,
"grad_norm": 0.0016039250185713172,
"learning_rate": 2.9759366736906045e-05,
"loss": 0.0,
"step": 72000
},
{
"epoch": 0.11573584450849381,
"grad_norm": 40.93400955200195,
"learning_rate": 2.9755998655158137e-05,
"loss": 0.0,
"step": 72500
},
{
"epoch": 0.11653402274648343,
"grad_norm": 0.000588159600738436,
"learning_rate": 2.9752607359116423e-05,
"loss": 0.0,
"step": 73000
},
{
"epoch": 0.11733220098447304,
"grad_norm": 127.08687591552734,
"learning_rate": 2.974919285411612e-05,
"loss": 0.0,
"step": 73500
},
{
"epoch": 0.11813037922246265,
"grad_norm": 0.001267329789698124,
"learning_rate": 2.9745755145528964e-05,
"loss": 0.0,
"step": 74000
},
{
"epoch": 0.11892855746045226,
"grad_norm": 0.0433398075401783,
"learning_rate": 2.97422942387632e-05,
"loss": 0.0,
"step": 74500
},
{
"epoch": 0.11972673569844187,
"grad_norm": 0.02973037399351597,
"learning_rate": 2.973881013926356e-05,
"loss": 0.0,
"step": 75000
},
{
"epoch": 0.12052491393643149,
"grad_norm": 0.0013027731329202652,
"learning_rate": 2.9735302852511267e-05,
"loss": 0.0,
"step": 75500
},
{
"epoch": 0.1213230921744211,
"grad_norm": 0.018169229850172997,
"learning_rate": 2.9731772384024015e-05,
"loss": 0.0,
"step": 76000
},
{
"epoch": 0.12212127041241072,
"grad_norm": 0.0007211797637864947,
"learning_rate": 2.9728218739355988e-05,
"loss": 0.0,
"step": 76500
},
{
"epoch": 0.12291944865040033,
"grad_norm": 22.20551109313965,
"learning_rate": 2.972464192409781e-05,
"loss": 0.0,
"step": 77000
},
{
"epoch": 0.12371762688838994,
"grad_norm": 0.001930927624925971,
"learning_rate": 2.972104194387656e-05,
"loss": 0.0,
"step": 77500
},
{
"epoch": 0.12451580512637955,
"grad_norm": 0.0008476130315102637,
"learning_rate": 2.9717418804355775e-05,
"loss": 0.0,
"step": 78000
},
{
"epoch": 0.12531398336436916,
"grad_norm": 0.016626249998807907,
"learning_rate": 2.9713772511235406e-05,
"loss": 0.0,
"step": 78500
},
{
"epoch": 0.12611216160235877,
"grad_norm": 0.001898073242045939,
"learning_rate": 2.971010307025185e-05,
"loss": 0.0,
"step": 79000
},
{
"epoch": 0.12691033984034839,
"grad_norm": 0.001200891681946814,
"learning_rate": 2.9706410487177906e-05,
"loss": 0.0,
"step": 79500
},
{
"epoch": 0.127708518078338,
"grad_norm": 0.009901273995637894,
"learning_rate": 2.970269476782278e-05,
"loss": 0.0,
"step": 80000
},
{
"epoch": 0.1285066963163276,
"grad_norm": 0.013532022945582867,
"learning_rate": 2.969895591803209e-05,
"loss": 0.0,
"step": 80500
},
{
"epoch": 0.12930487455431722,
"grad_norm": 0.0005334099405445158,
"learning_rate": 2.9695193943687834e-05,
"loss": 0.0,
"step": 81000
},
{
"epoch": 0.13010305279230683,
"grad_norm": 0.003070188919082284,
"learning_rate": 2.9691408850708383e-05,
"loss": 0.0,
"step": 81500
},
{
"epoch": 0.13090123103029644,
"grad_norm": 0.0006594851147383451,
"learning_rate": 2.9687600645048488e-05,
"loss": 0.0,
"step": 82000
},
{
"epoch": 0.13169940926828608,
"grad_norm": 0.0006931771640665829,
"learning_rate": 2.9683769332699262e-05,
"loss": 0.0,
"step": 82500
},
{
"epoch": 0.1324975875062757,
"grad_norm": 0.0007057678885757923,
"learning_rate": 2.967991491968816e-05,
"loss": 0.0,
"step": 83000
},
{
"epoch": 0.1332957657442653,
"grad_norm": 0.022385986521840096,
"learning_rate": 2.967603741207899e-05,
"loss": 0.0,
"step": 83500
},
{
"epoch": 0.1340939439822549,
"grad_norm": 0.00055215775500983,
"learning_rate": 2.9672136815971892e-05,
"loss": 0.0,
"step": 84000
},
{
"epoch": 0.13489212222024452,
"grad_norm": 0.041440509259700775,
"learning_rate": 2.9668213137503318e-05,
"loss": 0.0,
"step": 84500
},
{
"epoch": 0.13569030045823413,
"grad_norm": 0.0025042875204235315,
"learning_rate": 2.966426638284604e-05,
"loss": 0.0,
"step": 85000
},
{
"epoch": 0.13648847869622374,
"grad_norm": 0.0006895024562254548,
"learning_rate": 2.9660296558209146e-05,
"loss": 0.0,
"step": 85500
},
{
"epoch": 0.13728665693421335,
"grad_norm": 0.0008338299230672419,
"learning_rate": 2.9656303669837992e-05,
"loss": 0.0,
"step": 86000
},
{
"epoch": 0.13808483517220296,
"grad_norm": 0.0018846240127459168,
"learning_rate": 2.965228772401424e-05,
"loss": 0.0,
"step": 86500
},
{
"epoch": 0.13888301341019257,
"grad_norm": 0.0004423022910486907,
"learning_rate": 2.9648248727055812e-05,
"loss": 0.0,
"step": 87000
},
{
"epoch": 0.13968119164818218,
"grad_norm": 0.0012980562169104815,
"learning_rate": 2.964418668531691e-05,
"loss": 0.0,
"step": 87500
},
{
"epoch": 0.1404793698861718,
"grad_norm": 0.0038005076348781586,
"learning_rate": 2.964010160518798e-05,
"loss": 0.0,
"step": 88000
},
{
"epoch": 0.1412775481241614,
"grad_norm": 0.0011083179851993918,
"learning_rate": 2.9635993493095707e-05,
"loss": 0.0,
"step": 88500
},
{
"epoch": 0.14207572636215102,
"grad_norm": 0.28431451320648193,
"learning_rate": 2.963186235550302e-05,
"loss": 0.0,
"step": 89000
},
{
"epoch": 0.14287390460014063,
"grad_norm": 0.0007884473307058215,
"learning_rate": 2.962770819890907e-05,
"loss": 0.0,
"step": 89500
},
{
"epoch": 0.14367208283813024,
"grad_norm": 0.0057783485390245914,
"learning_rate": 2.9623531029849214e-05,
"loss": 0.0,
"step": 90000
},
{
"epoch": 0.14447026107611988,
"grad_norm": 9626.4208984375,
"learning_rate": 2.961933085489503e-05,
"loss": 0.0,
"step": 90500
},
{
"epoch": 0.1452684393141095,
"grad_norm": 0.06994491070508957,
"learning_rate": 2.961510768065427e-05,
"loss": 0.0,
"step": 91000
},
{
"epoch": 0.1460666175520991,
"grad_norm": 0.0010994599433615804,
"learning_rate": 2.9610861513770875e-05,
"loss": 0.0,
"step": 91500
},
{
"epoch": 0.1468647957900887,
"grad_norm": 0.0005376485059969127,
"learning_rate": 2.9606592360924967e-05,
"loss": 0.0,
"step": 92000
},
{
"epoch": 0.14766297402807832,
"grad_norm": 0.005438265856355429,
"learning_rate": 2.9602300228832815e-05,
"loss": 0.0,
"step": 92500
},
{
"epoch": 0.14846115226606793,
"grad_norm": 0.002110505709424615,
"learning_rate": 2.9597985124246854e-05,
"loss": 0.0,
"step": 93000
},
{
"epoch": 0.14925933050405754,
"grad_norm": 0.0004619982501026243,
"learning_rate": 2.959364705395565e-05,
"loss": 0.0,
"step": 93500
},
{
"epoch": 0.15005750874204715,
"grad_norm": 0.0009718042565509677,
"learning_rate": 2.958928602478389e-05,
"loss": 0.0,
"step": 94000
},
{
"epoch": 0.15085568698003676,
"grad_norm": 0.004765130113810301,
"learning_rate": 2.9584902043592412e-05,
"loss": 0.0,
"step": 94500
},
{
"epoch": 0.15165386521802637,
"grad_norm": 0.19533102214336395,
"learning_rate": 2.9580495117278124e-05,
"loss": 0.0,
"step": 95000
},
{
"epoch": 0.15245204345601598,
"grad_norm": 1262.41259765625,
"learning_rate": 2.9576065252774063e-05,
"loss": 0.0,
"step": 95500
},
{
"epoch": 0.1532502216940056,
"grad_norm": 0.001484275097027421,
"learning_rate": 2.957161245704933e-05,
"loss": 0.0,
"step": 96000
},
{
"epoch": 0.1540483999319952,
"grad_norm": 0.0024847572203725576,
"learning_rate": 2.9567136737109106e-05,
"loss": 0.0,
"step": 96500
},
{
"epoch": 0.15484657816998482,
"grad_norm": 0.0015979851596057415,
"learning_rate": 2.9562638099994656e-05,
"loss": 0.0,
"step": 97000
},
{
"epoch": 0.15564475640797443,
"grad_norm": 0.005389617756009102,
"learning_rate": 2.9558116552783274e-05,
"loss": 0.0,
"step": 97500
},
{
"epoch": 0.15644293464596404,
"grad_norm": 0.0015855624806135893,
"learning_rate": 2.9553572102588305e-05,
"loss": 0.0,
"step": 98000
},
{
"epoch": 0.15724111288395368,
"grad_norm": 0.0018055408727377653,
"learning_rate": 2.954900475655913e-05,
"loss": 0.0,
"step": 98500
},
{
"epoch": 0.1580392911219433,
"grad_norm": 0.0006732465699315071,
"learning_rate": 2.954441452188115e-05,
"loss": 0.0,
"step": 99000
},
{
"epoch": 0.1588374693599329,
"grad_norm": 0.0014587478945031762,
"learning_rate": 2.953980140577576e-05,
"loss": 0.0,
"step": 99500
},
{
"epoch": 0.1596356475979225,
"grad_norm": 0.1299201250076294,
"learning_rate": 2.953516541550037e-05,
"loss": 0.0,
"step": 100000
},
{
"epoch": 0.16043382583591212,
"grad_norm": 0.0025660579558461905,
"learning_rate": 2.9530506558348375e-05,
"loss": 0.0,
"step": 100500
},
{
"epoch": 0.16123200407390173,
"grad_norm": 0.0009547541849315166,
"learning_rate": 2.952582484164912e-05,
"loss": 0.0,
"step": 101000
},
{
"epoch": 0.16203018231189134,
"grad_norm": 0.000839733867906034,
"learning_rate": 2.952112027276796e-05,
"loss": 0.0,
"step": 101500
},
{
"epoch": 0.16282836054988095,
"grad_norm": 0.0009094159468077123,
"learning_rate": 2.9516392859106144e-05,
"loss": 0.0,
"step": 102000
},
{
"epoch": 0.16362653878787056,
"grad_norm": 0.0008817181806080043,
"learning_rate": 2.9511642608100906e-05,
"loss": 0.0,
"step": 102500
},
{
"epoch": 0.16442471702586017,
"grad_norm": 0.0047252182848751545,
"learning_rate": 2.9506869527225387e-05,
"loss": 0.0,
"step": 103000
},
{
"epoch": 0.16522289526384978,
"grad_norm": 0.000495503016281873,
"learning_rate": 2.9502073623988646e-05,
"loss": 0.0,
"step": 103500
},
{
"epoch": 0.1660210735018394,
"grad_norm": 0.0009167164098471403,
"learning_rate": 2.9497254905935656e-05,
"loss": 0.0,
"step": 104000
},
{
"epoch": 0.166819251739829,
"grad_norm": 0.0006542898481711745,
"learning_rate": 2.949241338064727e-05,
"loss": 0.0,
"step": 104500
},
{
"epoch": 0.16761742997781862,
"grad_norm": 0.008757367730140686,
"learning_rate": 2.948754905574023e-05,
"loss": 0.0,
"step": 105000
},
{
"epoch": 0.16841560821580823,
"grad_norm": 0.0005742495995946229,
"learning_rate": 2.9482661938867136e-05,
"loss": 0.0,
"step": 105500
},
{
"epoch": 0.16921378645379787,
"grad_norm": 0.0007464766968041658,
"learning_rate": 2.947775203771646e-05,
"loss": 0.0,
"step": 106000
},
{
"epoch": 0.17001196469178748,
"grad_norm": 0.0006082553300075233,
"learning_rate": 2.947281936001251e-05,
"loss": 0.0,
"step": 106500
},
{
"epoch": 0.1708101429297771,
"grad_norm": 0.0015842883149161935,
"learning_rate": 2.9467863913515423e-05,
"loss": 0.0,
"step": 107000
},
{
"epoch": 0.1716083211677667,
"grad_norm": 0.00969216413795948,
"learning_rate": 2.9462885706021167e-05,
"loss": 0.0,
"step": 107500
},
{
"epoch": 0.1724064994057563,
"grad_norm": 0.000725767866242677,
"learning_rate": 2.94578847453615e-05,
"loss": 0.0,
"step": 108000
},
{
"epoch": 0.17320467764374592,
"grad_norm": 0.0005956355598755181,
"learning_rate": 2.9452861039403994e-05,
"loss": 0.0,
"step": 108500
},
{
"epoch": 0.17400285588173553,
"grad_norm": 0.0028481916524469852,
"learning_rate": 2.9447814596051997e-05,
"loss": 0.0,
"step": 109000
},
{
"epoch": 0.17480103411972514,
"grad_norm": 0.0006914939149282873,
"learning_rate": 2.9442745423244625e-05,
"loss": 0.0,
"step": 109500
},
{
"epoch": 0.17559921235771475,
"grad_norm": 0.004076390527188778,
"learning_rate": 2.9437653528956757e-05,
"loss": 0.0,
"step": 110000
},
{
"epoch": 0.17639739059570436,
"grad_norm": 0.0007233788492158055,
"learning_rate": 2.943253892119901e-05,
"loss": 0.0,
"step": 110500
},
{
"epoch": 0.17719556883369397,
"grad_norm": 0.0004554203769657761,
"learning_rate": 2.9427401608017744e-05,
"loss": 0.0,
"step": 111000
},
{
"epoch": 0.17799374707168358,
"grad_norm": 0.011892233975231647,
"learning_rate": 2.9422241597495035e-05,
"loss": 0.0,
"step": 111500
},
{
"epoch": 0.1787919253096732,
"grad_norm": 0.0005836607306264341,
"learning_rate": 2.9417058897748664e-05,
"loss": 0.0,
"step": 112000
},
{
"epoch": 0.1795901035476628,
"grad_norm": 0.0005727341049350798,
"learning_rate": 2.941185351693211e-05,
"loss": 0.0,
"step": 112500
},
{
"epoch": 0.18038828178565242,
"grad_norm": 0.0004444452642928809,
"learning_rate": 2.9406625463234532e-05,
"loss": 0.0,
"step": 113000
},
{
"epoch": 0.18118646002364203,
"grad_norm": 0.0015697794733569026,
"learning_rate": 2.940137474488076e-05,
"loss": 0.0,
"step": 113500
},
{
"epoch": 0.18198463826163166,
"grad_norm": 0.0027179263997823,
"learning_rate": 2.9396101370131284e-05,
"loss": 0.0,
"step": 114000
},
{
"epoch": 0.18278281649962128,
"grad_norm": 190.7173309326172,
"learning_rate": 2.9390805347282225e-05,
"loss": 0.0,
"step": 114500
},
{
"epoch": 0.1835809947376109,
"grad_norm": 0.00045203749323263764,
"learning_rate": 2.938548668466535e-05,
"loss": 0.0,
"step": 115000
},
{
"epoch": 0.1843791729756005,
"grad_norm": 0.0028070039115846157,
"learning_rate": 2.938014539064803e-05,
"loss": 0.0,
"step": 115500
},
{
"epoch": 0.1851773512135901,
"grad_norm": 0.0007502553053200245,
"learning_rate": 2.9374781473633255e-05,
"loss": 0.0,
"step": 116000
},
{
"epoch": 0.18597552945157972,
"grad_norm": 0.0008750234264880419,
"learning_rate": 2.9369394942059582e-05,
"loss": 0.0,
"step": 116500
},
{
"epoch": 0.18677370768956933,
"grad_norm": 0.0004678396799135953,
"learning_rate": 2.9363985804401174e-05,
"loss": 0.0,
"step": 117000
},
{
"epoch": 0.18757188592755894,
"grad_norm": 0.0006737300427630544,
"learning_rate": 2.9358554069167733e-05,
"loss": 0.0,
"step": 117500
},
{
"epoch": 0.18837006416554855,
"grad_norm": 0.001020797179080546,
"learning_rate": 2.9353099744904527e-05,
"loss": 0.0,
"step": 118000
},
{
"epoch": 0.18916824240353816,
"grad_norm": 0.0005353185697458684,
"learning_rate": 2.9347622840192353e-05,
"loss": 0.0,
"step": 118500
},
{
"epoch": 0.18996642064152777,
"grad_norm": 0.0031409678049385548,
"learning_rate": 2.9342123363647542e-05,
"loss": 0.0,
"step": 119000
},
{
"epoch": 0.19076459887951738,
"grad_norm": 0.0005457552615553141,
"learning_rate": 2.933660132392193e-05,
"loss": 0.0,
"step": 119500
},
{
"epoch": 0.191562777117507,
"grad_norm": 0.007242423016577959,
"learning_rate": 2.933105672970284e-05,
"loss": 0.0,
"step": 120000
},
{
"epoch": 0.1923609553554966,
"grad_norm": 0.0015077221905812621,
"learning_rate": 2.9325489589713092e-05,
"loss": 0.0,
"step": 120500
},
{
"epoch": 0.19315913359348622,
"grad_norm": 0.000620639999397099,
"learning_rate": 2.9319899912710968e-05,
"loss": 0.0,
"step": 121000
},
{
"epoch": 0.19395731183147583,
"grad_norm": 0.01805310882627964,
"learning_rate": 2.9314287707490208e-05,
"loss": 0.0,
"step": 121500
},
{
"epoch": 0.19475549006946546,
"grad_norm": 13.3745698928833,
"learning_rate": 2.9308652982879998e-05,
"loss": 0.0,
"step": 122000
},
{
"epoch": 0.19555366830745508,
"grad_norm": 0.0002797323395498097,
"learning_rate": 2.9302995747744935e-05,
"loss": 0.0,
"step": 122500
},
{
"epoch": 0.19635184654544469,
"grad_norm": 0.0012682373635470867,
"learning_rate": 2.929731601098505e-05,
"loss": 0.0,
"step": 123000
},
{
"epoch": 0.1971500247834343,
"grad_norm": 0.1774342805147171,
"learning_rate": 2.9291613781535764e-05,
"loss": 0.0,
"step": 123500
},
{
"epoch": 0.1979482030214239,
"grad_norm": 0.0004860001499764621,
"learning_rate": 2.928588906836788e-05,
"loss": 0.0,
"step": 124000
},
{
"epoch": 0.19874638125941352,
"grad_norm": 0.0010263145668432117,
"learning_rate": 2.9280141880487584e-05,
"loss": 0.0,
"step": 124500
},
{
"epoch": 0.19954455949740313,
"grad_norm": 0.0008300538174808025,
"learning_rate": 2.9274372226936416e-05,
"loss": 0.0,
"step": 125000
},
{
"epoch": 0.20034273773539274,
"grad_norm": 0.0008429441950283945,
"learning_rate": 2.9268580116791246e-05,
"loss": 0.0,
"step": 125500
},
{
"epoch": 0.20114091597338235,
"grad_norm": 0.000568005139939487,
"learning_rate": 2.926276555916429e-05,
"loss": 0.0,
"step": 126000
},
{
"epoch": 0.20193909421137196,
"grad_norm": 0.00045849403250031173,
"learning_rate": 2.9256928563203063e-05,
"loss": 0.0,
"step": 126500
},
{
"epoch": 0.20273727244936157,
"grad_norm": 0.001164284534752369,
"learning_rate": 2.9251069138090403e-05,
"loss": 0.0,
"step": 127000
},
{
"epoch": 0.20353545068735118,
"grad_norm": 20.4660587310791,
"learning_rate": 2.924518729304441e-05,
"loss": 0.0,
"step": 127500
},
{
"epoch": 0.2043336289253408,
"grad_norm": 0.00039680031477473676,
"learning_rate": 2.9239283037318466e-05,
"loss": 0.0,
"step": 128000
},
{
"epoch": 0.2051318071633304,
"grad_norm": 0.0023022566456347704,
"learning_rate": 2.9233356380201214e-05,
"loss": 0.0,
"step": 128500
},
{
"epoch": 0.20592998540132001,
"grad_norm": 0.000906547240447253,
"learning_rate": 2.9227407331016532e-05,
"loss": 0.0,
"step": 129000
},
{
"epoch": 0.20672816363930965,
"grad_norm": 0.0004909643321298063,
"learning_rate": 2.9221435899123522e-05,
"loss": 0.0,
"step": 129500
},
{
"epoch": 0.20752634187729926,
"grad_norm": 0.0005696564330719411,
"learning_rate": 2.921544209391651e-05,
"loss": 0.0,
"step": 130000
},
{
"epoch": 0.20832452011528887,
"grad_norm": 0.0012174558360129595,
"learning_rate": 2.920942592482501e-05,
"loss": 0.0,
"step": 130500
},
{
"epoch": 0.20912269835327849,
"grad_norm": 0.0008444120176136494,
"learning_rate": 2.920338740131373e-05,
"loss": 0.0,
"step": 131000
},
{
"epoch": 0.2099208765912681,
"grad_norm": 19.62136459350586,
"learning_rate": 2.919732653288253e-05,
"loss": 0.0,
"step": 131500
},
{
"epoch": 0.2107190548292577,
"grad_norm": 0.0005330504500307143,
"learning_rate": 2.919124332906644e-05,
"loss": 0.0,
"step": 132000
},
{
"epoch": 0.21151723306724732,
"grad_norm": 0.0009033152018673718,
"learning_rate": 2.9185137799435615e-05,
"loss": 0.0,
"step": 132500
},
{
"epoch": 0.21231541130523693,
"grad_norm": 0.000573009136132896,
"learning_rate": 2.9179009953595344e-05,
"loss": 0.0,
"step": 133000
},
{
"epoch": 0.21311358954322654,
"grad_norm": 0.0006213324377313256,
"learning_rate": 2.9172859801186013e-05,
"loss": 0.0,
"step": 133500
},
{
"epoch": 0.21391176778121615,
"grad_norm": 0.0007363075274042785,
"learning_rate": 2.916668735188312e-05,
"loss": 0.0,
"step": 134000
},
{
"epoch": 0.21470994601920576,
"grad_norm": 0.0005618541617877781,
"learning_rate": 2.916049261539721e-05,
"loss": 0.0,
"step": 134500
},
{
"epoch": 0.21550812425719537,
"grad_norm": 0.006180692929774523,
"learning_rate": 2.9154275601473923e-05,
"loss": 0.0,
"step": 135000
},
{
"epoch": 0.21630630249518498,
"grad_norm": 0.0006301426910795271,
"learning_rate": 2.914803631989392e-05,
"loss": 0.0,
"step": 135500
},
{
"epoch": 0.2171044807331746,
"grad_norm": 0.0004460285708773881,
"learning_rate": 2.9141774780472914e-05,
"loss": 0.0,
"step": 136000
},
{
"epoch": 0.2179026589711642,
"grad_norm": 0.0005381643422879279,
"learning_rate": 2.9135490993061626e-05,
"loss": 0.0,
"step": 136500
},
{
"epoch": 0.21870083720915381,
"grad_norm": 105.94903564453125,
"learning_rate": 2.9129184967545768e-05,
"loss": 0.0,
"step": 137000
},
{
"epoch": 0.21949901544714345,
"grad_norm": 0.0006671809242106974,
"learning_rate": 2.9122856713846047e-05,
"loss": 0.0,
"step": 137500
},
{
"epoch": 0.22029719368513306,
"grad_norm": 0.0009246356203220785,
"learning_rate": 2.911650624191815e-05,
"loss": 0.0,
"step": 138000
},
{
"epoch": 0.22109537192312267,
"grad_norm": 0.000820090644992888,
"learning_rate": 2.9110133561752703e-05,
"loss": 0.0,
"step": 138500
},
{
"epoch": 0.22189355016111229,
"grad_norm": 3.0545477867126465,
"learning_rate": 2.9103738683375266e-05,
"loss": 0.0,
"step": 139000
},
{
"epoch": 0.2226917283991019,
"grad_norm": 0.0010678736725822091,
"learning_rate": 2.9097321616846334e-05,
"loss": 0.0,
"step": 139500
},
{
"epoch": 0.2234899066370915,
"grad_norm": 0.003939803224056959,
"learning_rate": 2.9090882372261308e-05,
"loss": 0.0,
"step": 140000
},
{
"epoch": 0.22428808487508112,
"grad_norm": 0.0022800497245043516,
"learning_rate": 2.908442095975047e-05,
"loss": 0.0,
"step": 140500
},
{
"epoch": 0.22508626311307073,
"grad_norm": 0.000471117120468989,
"learning_rate": 2.907793738947899e-05,
"loss": 0.0,
"step": 141000
},
{
"epoch": 0.22588444135106034,
"grad_norm": 135.74588012695312,
"learning_rate": 2.9071431671646884e-05,
"loss": 0.0,
"step": 141500
},
{
"epoch": 0.22668261958904995,
"grad_norm": 0.00035868247505277395,
"learning_rate": 2.9064903816489015e-05,
"loss": 0.0,
"step": 142000
},
{
"epoch": 0.22748079782703956,
"grad_norm": 0.004526620730757713,
"learning_rate": 2.905835383427508e-05,
"loss": 0.0,
"step": 142500
},
{
"epoch": 0.22827897606502917,
"grad_norm": 0.01911444030702114,
"learning_rate": 2.9051781735309576e-05,
"loss": 0.0,
"step": 143000
},
{
"epoch": 0.22907715430301878,
"grad_norm": 3154.1181640625,
"learning_rate": 2.9045187529931803e-05,
"loss": 0.0,
"step": 143500
},
{
"epoch": 0.2298753325410084,
"grad_norm": 0.00026835728203877807,
"learning_rate": 2.903857122851583e-05,
"loss": 0.0,
"step": 144000
},
{
"epoch": 0.230673510778998,
"grad_norm": 523.6666259765625,
"learning_rate": 2.9031932841470495e-05,
"loss": 0.0,
"step": 144500
},
{
"epoch": 0.23147168901698761,
"grad_norm": 0.002406003652140498,
"learning_rate": 2.9025272379239383e-05,
"loss": 0.0,
"step": 145000
},
{
"epoch": 0.23226986725497725,
"grad_norm": 0.0015471165534108877,
"learning_rate": 2.9018589852300794e-05,
"loss": 0.0,
"step": 145500
},
{
"epoch": 0.23306804549296686,
"grad_norm": 0.004399681463837624,
"learning_rate": 2.901188527116776e-05,
"loss": 0.0,
"step": 146000
},
{
"epoch": 0.23386622373095647,
"grad_norm": 15.444055557250977,
"learning_rate": 2.9005158646387993e-05,
"loss": 0.0,
"step": 146500
},
{
"epoch": 0.23466440196894608,
"grad_norm": 0.018915316089987755,
"learning_rate": 2.8998409988543897e-05,
"loss": 0.0,
"step": 147000
},
{
"epoch": 0.2354625802069357,
"grad_norm": 57.25000762939453,
"learning_rate": 2.8991639308252527e-05,
"loss": 0.0,
"step": 147500
},
{
"epoch": 0.2362607584449253,
"grad_norm": 0.0022897564340382814,
"learning_rate": 2.8984846616165586e-05,
"loss": 0.0,
"step": 148000
},
{
"epoch": 0.23705893668291492,
"grad_norm": 0.00042616488644853234,
"learning_rate": 2.8978031922969418e-05,
"loss": 0.0,
"step": 148500
},
{
"epoch": 0.23785711492090453,
"grad_norm": 0.01706228218972683,
"learning_rate": 2.8971195239384966e-05,
"loss": 0.0,
"step": 149000
},
{
"epoch": 0.23865529315889414,
"grad_norm": 0.0007037579198367894,
"learning_rate": 2.896433657616777e-05,
"loss": 0.0,
"step": 149500
},
{
"epoch": 0.23945347139688375,
"grad_norm": 0.0006432163645513356,
"learning_rate": 2.8957455944107963e-05,
"loss": 0.0,
"step": 150000
},
{
"epoch": 0.23945347139688375,
"eval_loss": 2.1141684555914253e-05,
"eval_runtime": 21663.2369,
"eval_samples_per_second": 102.814,
"eval_steps_per_second": 3.213,
"step": 150000
},
{
"epoch": 0.24025164963487336,
"grad_norm": 0.000818128464743495,
"learning_rate": 2.8950553354030216e-05,
"loss": 0.0,
"step": 150500
},
{
"epoch": 0.24104982787286297,
"grad_norm": 186.0622100830078,
"learning_rate": 2.894362881679376e-05,
"loss": 0.0,
"step": 151000
},
{
"epoch": 0.24184800611085258,
"grad_norm": 1975.7821044921875,
"learning_rate": 2.893668234329236e-05,
"loss": 0.0,
"step": 151500
},
{
"epoch": 0.2426461843488422,
"grad_norm": 0.00043766028829850256,
"learning_rate": 2.892971394445427e-05,
"loss": 0.0,
"step": 152000
},
{
"epoch": 0.2434443625868318,
"grad_norm": 0.00039182481123134494,
"learning_rate": 2.8922723631242254e-05,
"loss": 0.0,
"step": 152500
},
{
"epoch": 0.24424254082482144,
"grad_norm": 0.0010972399031743407,
"learning_rate": 2.8915711414653543e-05,
"loss": 0.0,
"step": 153000
},
{
"epoch": 0.24504071906281105,
"grad_norm": 0.05904774367809296,
"learning_rate": 2.8908677305719836e-05,
"loss": 0.0,
"step": 153500
},
{
"epoch": 0.24583889730080066,
"grad_norm": 0.019364451989531517,
"learning_rate": 2.890162131550727e-05,
"loss": 0.0,
"step": 154000
},
{
"epoch": 0.24663707553879027,
"grad_norm": 0.000549559888895601,
"learning_rate": 2.8894543455116397e-05,
"loss": 0.0,
"step": 154500
},
{
"epoch": 0.24743525377677988,
"grad_norm": 0.00035546591971069574,
"learning_rate": 2.888744373568218e-05,
"loss": 0.0,
"step": 155000
},
{
"epoch": 0.2482334320147695,
"grad_norm": 0.0004987181746400893,
"learning_rate": 2.8880322168373987e-05,
"loss": 0.0,
"step": 155500
},
{
"epoch": 0.2490316102527591,
"grad_norm": 0.00044386033550836146,
"learning_rate": 2.887317876439553e-05,
"loss": 0.0,
"step": 156000
},
{
"epoch": 0.24982978849074872,
"grad_norm": 7.740879535675049,
"learning_rate": 2.886601353498489e-05,
"loss": 0.0,
"step": 156500
},
{
"epoch": 0.2506279667287383,
"grad_norm": 0.0005541268619708717,
"learning_rate": 2.8858826491414486e-05,
"loss": 0.0,
"step": 157000
},
{
"epoch": 0.25142614496672794,
"grad_norm": 0.0003068426449317485,
"learning_rate": 2.885161764499105e-05,
"loss": 0.0,
"step": 157500
},
{
"epoch": 0.25222432320471755,
"grad_norm": 0.0004327596980147064,
"learning_rate": 2.8844387007055617e-05,
"loss": 0.0,
"step": 158000
},
{
"epoch": 0.25302250144270716,
"grad_norm": 0.0004614158533513546,
"learning_rate": 2.88371345889835e-05,
"loss": 0.0,
"step": 158500
},
{
"epoch": 0.25382067968069677,
"grad_norm": 0.007937498390674591,
"learning_rate": 2.8829860402184278e-05,
"loss": 0.0,
"step": 159000
},
{
"epoch": 0.2546188579186864,
"grad_norm": 0.00025344284949824214,
"learning_rate": 2.882256445810179e-05,
"loss": 0.0,
"step": 159500
},
{
"epoch": 0.255417036156676,
"grad_norm": 0.00045202774344943464,
"learning_rate": 2.881524676821408e-05,
"loss": 0.0,
"step": 160000
},
{
"epoch": 0.2562152143946656,
"grad_norm": 0.0005293320282362401,
"learning_rate": 2.880790734403342e-05,
"loss": 0.0,
"step": 160500
},
{
"epoch": 0.2570133926326552,
"grad_norm": 6070.3427734375,
"learning_rate": 2.8800546197106277e-05,
"loss": 0.0,
"step": 161000
},
{
"epoch": 0.2578115708706448,
"grad_norm": 0.0039901817217469215,
"learning_rate": 2.8793163339013275e-05,
"loss": 0.0,
"step": 161500
},
{
"epoch": 0.25860974910863443,
"grad_norm": 0.000935662304982543,
"learning_rate": 2.878575878136921e-05,
"loss": 0.0,
"step": 162000
},
{
"epoch": 0.25940792734662405,
"grad_norm": 0.00384966260753572,
"learning_rate": 2.8778332535823013e-05,
"loss": 0.0,
"step": 162500
},
{
"epoch": 0.26020610558461366,
"grad_norm": 0.000354414718458429,
"learning_rate": 2.8770884614057727e-05,
"loss": 0.0,
"step": 163000
},
{
"epoch": 0.26100428382260327,
"grad_norm": 0.0002711515699047595,
"learning_rate": 2.87634150277905e-05,
"loss": 0.0,
"step": 163500
},
{
"epoch": 0.2618024620605929,
"grad_norm": 0.0004056449397467077,
"learning_rate": 2.8755923788772574e-05,
"loss": 0.0,
"step": 164000
},
{
"epoch": 0.2626006402985825,
"grad_norm": 0.022129971534013748,
"learning_rate": 2.874841090878924e-05,
"loss": 0.0,
"step": 164500
},
{
"epoch": 0.26339881853657215,
"grad_norm": 0.00036545773036777973,
"learning_rate": 2.8740876399659837e-05,
"loss": 0.0,
"step": 165000
},
{
"epoch": 0.26419699677456177,
"grad_norm": 0.003474722383543849,
"learning_rate": 2.8733320273237744e-05,
"loss": 0.0,
"step": 165500
},
{
"epoch": 0.2649951750125514,
"grad_norm": 0.005221190862357616,
"learning_rate": 2.8725742541410327e-05,
"loss": 0.0,
"step": 166000
},
{
"epoch": 0.265793353250541,
"grad_norm": 0.0006509709637612104,
"learning_rate": 2.871814321609897e-05,
"loss": 0.0,
"step": 166500
},
{
"epoch": 0.2665915314885306,
"grad_norm": 0.00034523566137067974,
"learning_rate": 2.8710522309258996e-05,
"loss": 0.0,
"step": 167000
},
{
"epoch": 0.2673897097265202,
"grad_norm": 0.007011398207396269,
"learning_rate": 2.870287983287971e-05,
"loss": 0.0,
"step": 167500
},
{
"epoch": 0.2681878879645098,
"grad_norm": 0.0003220826911274344,
"learning_rate": 2.8695215798984326e-05,
"loss": 0.0,
"step": 168000
},
{
"epoch": 0.26898606620249943,
"grad_norm": 0.00026051796157844365,
"learning_rate": 2.8687530219629986e-05,
"loss": 0.0,
"step": 168500
},
{
"epoch": 0.26978424444048904,
"grad_norm": 0.005468637682497501,
"learning_rate": 2.8679823106907734e-05,
"loss": 0.0,
"step": 169000
},
{
"epoch": 0.27058242267847865,
"grad_norm": 0.0006140482728369534,
"learning_rate": 2.8672094472942476e-05,
"loss": 0.0,
"step": 169500
},
{
"epoch": 0.27138060091646826,
"grad_norm": 0.000635271891951561,
"learning_rate": 2.8664344329892976e-05,
"loss": 0.0,
"step": 170000
},
{
"epoch": 0.2721787791544579,
"grad_norm": 0.0008043874986469746,
"learning_rate": 2.8656572689951845e-05,
"loss": 0.0,
"step": 170500
},
{
"epoch": 0.2729769573924475,
"grad_norm": 3775.17041015625,
"learning_rate": 2.8648779565345512e-05,
"loss": 0.0,
"step": 171000
},
{
"epoch": 0.2737751356304371,
"grad_norm": 0.0005806323024444282,
"learning_rate": 2.8640964968334205e-05,
"loss": 0.0,
"step": 171500
},
{
"epoch": 0.2745733138684267,
"grad_norm": 0.0011638767318800092,
"learning_rate": 2.8633128911211924e-05,
"loss": 0.0,
"step": 172000
},
{
"epoch": 0.2753714921064163,
"grad_norm": 0.0003357531677465886,
"learning_rate": 2.862527140630644e-05,
"loss": 0.0,
"step": 172500
},
{
"epoch": 0.2761696703444059,
"grad_norm": 0.020955944433808327,
"learning_rate": 2.8617392465979268e-05,
"loss": 0.0,
"step": 173000
},
{
"epoch": 0.27696784858239554,
"grad_norm": 0.00024525824119336903,
"learning_rate": 2.8609492102625634e-05,
"loss": 0.0,
"step": 173500
},
{
"epoch": 0.27776602682038515,
"grad_norm": 0.0786820575594902,
"learning_rate": 2.8601570328674474e-05,
"loss": 0.0,
"step": 174000
},
{
"epoch": 0.27856420505837476,
"grad_norm": 0.0002496826637070626,
"learning_rate": 2.859362715658841e-05,
"loss": 0.0,
"step": 174500
},
{
"epoch": 0.27936238329636437,
"grad_norm": 0.00032802074565552175,
"learning_rate": 2.8585662598863728e-05,
"loss": 0.0,
"step": 175000
},
{
"epoch": 0.280160561534354,
"grad_norm": 0.0019494870211929083,
"learning_rate": 2.8577676668030345e-05,
"loss": 0.0,
"step": 175500
},
{
"epoch": 0.2809587397723436,
"grad_norm": 0.0005683369236066937,
"learning_rate": 2.856966937665182e-05,
"loss": 0.0,
"step": 176000
},
{
"epoch": 0.2817569180103332,
"grad_norm": 0.000460715004010126,
"learning_rate": 2.8561640737325308e-05,
"loss": 0.0,
"step": 176500
},
{
"epoch": 0.2825550962483228,
"grad_norm": 0.00047412581625394523,
"learning_rate": 2.8553590762681547e-05,
"loss": 0.0,
"step": 177000
},
{
"epoch": 0.2833532744863124,
"grad_norm": 0.00045982238953001797,
"learning_rate": 2.854551946538485e-05,
"loss": 0.0,
"step": 177500
},
{
"epoch": 0.28415145272430203,
"grad_norm": 0.44650372862815857,
"learning_rate": 2.8537426858133053e-05,
"loss": 0.0,
"step": 178000
},
{
"epoch": 0.28494963096229164,
"grad_norm": 0.0004917326150462031,
"learning_rate": 2.852931295365754e-05,
"loss": 0.0,
"step": 178500
},
{
"epoch": 0.28574780920028126,
"grad_norm": 0.0005965101881884038,
"learning_rate": 2.85211777647232e-05,
"loss": 0.0,
"step": 179000
},
{
"epoch": 0.28654598743827087,
"grad_norm": 0.004035876132547855,
"learning_rate": 2.8513021304128383e-05,
"loss": 0.0,
"step": 179500
},
{
"epoch": 0.2873441656762605,
"grad_norm": 0.0002739470510277897,
"learning_rate": 2.850484358470493e-05,
"loss": 0.0,
"step": 180000
},
{
"epoch": 0.28814234391425014,
"grad_norm": 7.400282859802246,
"learning_rate": 2.8496644619318112e-05,
"loss": 0.0,
"step": 180500
},
{
"epoch": 0.28894052215223975,
"grad_norm": 0.008809339255094528,
"learning_rate": 2.848842442086663e-05,
"loss": 0.0,
"step": 181000
},
{
"epoch": 0.28973870039022936,
"grad_norm": 0.005798212252557278,
"learning_rate": 2.848018300228259e-05,
"loss": 0.0,
"step": 181500
},
{
"epoch": 0.290536878628219,
"grad_norm": 506.3049011230469,
"learning_rate": 2.847192037653147e-05,
"loss": 0.0,
"step": 182000
},
{
"epoch": 0.2913350568662086,
"grad_norm": 1442.7161865234375,
"learning_rate": 2.846363655661213e-05,
"loss": 0.0,
"step": 182500
},
{
"epoch": 0.2921332351041982,
"grad_norm": 0.0002742527285590768,
"learning_rate": 2.845533155555676e-05,
"loss": 0.0,
"step": 183000
},
{
"epoch": 0.2929314133421878,
"grad_norm": 0.053963255137205124,
"learning_rate": 2.844700538643088e-05,
"loss": 0.0,
"step": 183500
},
{
"epoch": 0.2937295915801774,
"grad_norm": 0.00021619696053676307,
"learning_rate": 2.8438658062333298e-05,
"loss": 0.0,
"step": 184000
},
{
"epoch": 0.29452776981816703,
"grad_norm": 0.0017313063144683838,
"learning_rate": 2.843028959639612e-05,
"loss": 0.0,
"step": 184500
},
{
"epoch": 0.29532594805615664,
"grad_norm": 0.004092790186405182,
"learning_rate": 2.8421900001784705e-05,
"loss": 0.0,
"step": 185000
},
{
"epoch": 0.29612412629414625,
"grad_norm": 0.0004076190816704184,
"learning_rate": 2.8413489291697654e-05,
"loss": 0.0,
"step": 185500
},
{
"epoch": 0.29692230453213586,
"grad_norm": 0.00018880168499890715,
"learning_rate": 2.8405057479366783e-05,
"loss": 0.0,
"step": 186000
},
{
"epoch": 0.29772048277012547,
"grad_norm": 0.0006835302338004112,
"learning_rate": 2.8396604578057106e-05,
"loss": 0.0,
"step": 186500
},
{
"epoch": 0.2985186610081151,
"grad_norm": 0.000788278179243207,
"learning_rate": 2.838813060106682e-05,
"loss": 0.0,
"step": 187000
},
{
"epoch": 0.2993168392461047,
"grad_norm": 0.0005574611132033169,
"learning_rate": 2.837963556172728e-05,
"loss": 0.0,
"step": 187500
},
{
"epoch": 0.3001150174840943,
"grad_norm": 0.00031035192660056055,
"learning_rate": 2.8371119473402962e-05,
"loss": 0.0,
"step": 188000
},
{
"epoch": 0.3009131957220839,
"grad_norm": 0.0002816423657350242,
"learning_rate": 2.8362582349491475e-05,
"loss": 0.0,
"step": 188500
},
{
"epoch": 0.3017113739600735,
"grad_norm": 0.0002125926548615098,
"learning_rate": 2.8354024203423506e-05,
"loss": 0.0,
"step": 189000
},
{
"epoch": 0.30250955219806314,
"grad_norm": 0.001808375702239573,
"learning_rate": 2.8345445048662833e-05,
"loss": 0.0,
"step": 189500
},
{
"epoch": 0.30330773043605275,
"grad_norm": 2678.749267578125,
"learning_rate": 2.8336844898706263e-05,
"loss": 0.0,
"step": 190000
},
{
"epoch": 0.30410590867404236,
"grad_norm": 0.00032715877750888467,
"learning_rate": 2.8328223767083646e-05,
"loss": 0.0,
"step": 190500
},
{
"epoch": 0.30490408691203197,
"grad_norm": 0.0005066508892923594,
"learning_rate": 2.8319581667357835e-05,
"loss": 0.0,
"step": 191000
},
{
"epoch": 0.3057022651500216,
"grad_norm": 0.00035031078732572496,
"learning_rate": 2.831091861312468e-05,
"loss": 0.0,
"step": 191500
},
{
"epoch": 0.3065004433880112,
"grad_norm": 2.2488694190979004,
"learning_rate": 2.8302234618012987e-05,
"loss": 0.0,
"step": 192000
},
{
"epoch": 0.3072986216260008,
"grad_norm": 0.00026478810468688607,
"learning_rate": 2.8293529695684503e-05,
"loss": 0.0,
"step": 192500
},
{
"epoch": 0.3080967998639904,
"grad_norm": 0.0003047047066502273,
"learning_rate": 2.8284803859833914e-05,
"loss": 0.0,
"step": 193000
},
{
"epoch": 0.30889497810198,
"grad_norm": 0.00020288255473133177,
"learning_rate": 2.827605712418879e-05,
"loss": 0.0,
"step": 193500
},
{
"epoch": 0.30969315633996963,
"grad_norm": 0.0002386285923421383,
"learning_rate": 2.8267289502509593e-05,
"loss": 0.0,
"step": 194000
},
{
"epoch": 0.31049133457795924,
"grad_norm": 0.007014371454715729,
"learning_rate": 2.8258501008589643e-05,
"loss": 0.0,
"step": 194500
},
{
"epoch": 0.31128951281594885,
"grad_norm": 0.0012399045517668128,
"learning_rate": 2.8249691656255076e-05,
"loss": 0.0,
"step": 195000
},
{
"epoch": 0.31208769105393847,
"grad_norm": 0.006949532311409712,
"learning_rate": 2.8240861459364876e-05,
"loss": 0.0,
"step": 195500
},
{
"epoch": 0.3128858692919281,
"grad_norm": 0.011967489495873451,
"learning_rate": 2.823201043181079e-05,
"loss": 0.0,
"step": 196000
},
{
"epoch": 0.31368404752991774,
"grad_norm": 0.00042293136357329786,
"learning_rate": 2.8223138587517358e-05,
"loss": 0.0,
"step": 196500
},
{
"epoch": 0.31448222576790735,
"grad_norm": 0.00022384982730727643,
"learning_rate": 2.8214245940441855e-05,
"loss": 0.0,
"step": 197000
},
{
"epoch": 0.31528040400589696,
"grad_norm": 0.00030645483639091253,
"learning_rate": 2.820533250457429e-05,
"loss": 0.0,
"step": 197500
},
{
"epoch": 0.3160785822438866,
"grad_norm": 0.18542060256004333,
"learning_rate": 2.819639829393737e-05,
"loss": 0.0,
"step": 198000
},
{
"epoch": 0.3168767604818762,
"grad_norm": 0.013315930962562561,
"learning_rate": 2.81874433225865e-05,
"loss": 0.0,
"step": 198500
},
{
"epoch": 0.3176749387198658,
"grad_norm": 0.0004632196214515716,
"learning_rate": 2.817846760460972e-05,
"loss": 0.0,
"step": 199000
},
{
"epoch": 0.3184731169578554,
"grad_norm": 0.0004162040422670543,
"learning_rate": 2.816947115412774e-05,
"loss": 0.0,
"step": 199500
},
{
"epoch": 0.319271295195845,
"grad_norm": 0.008228735998272896,
"learning_rate": 2.8160453985293868e-05,
"loss": 0.0,
"step": 200000
},
{
"epoch": 0.32006947343383463,
"grad_norm": 0.002274709288030863,
"learning_rate": 2.8151416112294007e-05,
"loss": 0.0,
"step": 200500
},
{
"epoch": 0.32086765167182424,
"grad_norm": 0.006481232587248087,
"learning_rate": 2.8142357549346632e-05,
"loss": 0.0,
"step": 201000
},
{
"epoch": 0.32166582990981385,
"grad_norm": 0.0003591532295104116,
"learning_rate": 2.8133278310702778e-05,
"loss": 0.0,
"step": 201500
},
{
"epoch": 0.32246400814780346,
"grad_norm": 0.000833726953715086,
"learning_rate": 2.812417841064599e-05,
"loss": 0.0,
"step": 202000
},
{
"epoch": 0.32326218638579307,
"grad_norm": 0.008296859450638294,
"learning_rate": 2.8115057863492336e-05,
"loss": 0.0,
"step": 202500
},
{
"epoch": 0.3240603646237827,
"grad_norm": 0.0004818796587642282,
"learning_rate": 2.8105916683590356e-05,
"loss": 0.0,
"step": 203000
},
{
"epoch": 0.3248585428617723,
"grad_norm": 0.0006113914423622191,
"learning_rate": 2.8096754885321048e-05,
"loss": 0.0,
"step": 203500
},
{
"epoch": 0.3256567210997619,
"grad_norm": 0.001762945088557899,
"learning_rate": 2.808757248309785e-05,
"loss": 0.0,
"step": 204000
},
{
"epoch": 0.3264548993377515,
"grad_norm": 0.0005597475683316588,
"learning_rate": 2.8078369491366622e-05,
"loss": 0.0,
"step": 204500
},
{
"epoch": 0.3272530775757411,
"grad_norm": 0.0004654059885069728,
"learning_rate": 2.80691459246056e-05,
"loss": 0.0,
"step": 205000
},
{
"epoch": 0.32805125581373074,
"grad_norm": 0.01907452754676342,
"learning_rate": 2.8059901797325403e-05,
"loss": 0.0,
"step": 205500
},
{
"epoch": 0.32884943405172035,
"grad_norm": 0.0023259760346263647,
"learning_rate": 2.8050637124068985e-05,
"loss": 0.0,
"step": 206000
},
{
"epoch": 0.32964761228970996,
"grad_norm": 0.00024120333546306938,
"learning_rate": 2.8041351919411633e-05,
"loss": 0.0,
"step": 206500
},
{
"epoch": 0.33044579052769957,
"grad_norm": 0.00039336824556812644,
"learning_rate": 2.803204619796093e-05,
"loss": 0.0,
"step": 207000
},
{
"epoch": 0.3312439687656892,
"grad_norm": 0.00048360280925408006,
"learning_rate": 2.8022719974356725e-05,
"loss": 0.0,
"step": 207500
},
{
"epoch": 0.3320421470036788,
"grad_norm": 0.00030716744367964566,
"learning_rate": 2.8013373263271147e-05,
"loss": 0.0,
"step": 208000
},
{
"epoch": 0.3328403252416684,
"grad_norm": 0.0003900101291947067,
"learning_rate": 2.8004006079408534e-05,
"loss": 0.0,
"step": 208500
},
{
"epoch": 0.333638503479658,
"grad_norm": 0.0009112692205235362,
"learning_rate": 2.799461843750544e-05,
"loss": 0.0,
"step": 209000
},
{
"epoch": 0.3344366817176476,
"grad_norm": 0.002314511453732848,
"learning_rate": 2.7985210352330603e-05,
"loss": 0.0,
"step": 209500
},
{
"epoch": 0.33523485995563723,
"grad_norm": 0.0004494874447118491,
"learning_rate": 2.7975781838684925e-05,
"loss": 0.0,
"step": 210000
},
{
"epoch": 0.33603303819362684,
"grad_norm": 0.000736879650503397,
"learning_rate": 2.7966332911401435e-05,
"loss": 0.0,
"step": 210500
},
{
"epoch": 0.33683121643161645,
"grad_norm": 0.0036531416699290276,
"learning_rate": 2.7956863585345295e-05,
"loss": 0.0,
"step": 211000
},
{
"epoch": 0.33762939466960606,
"grad_norm": 0.00030530127696692944,
"learning_rate": 2.7947373875413744e-05,
"loss": 0.0,
"step": 211500
},
{
"epoch": 0.33842757290759573,
"grad_norm": 0.0014557373942807317,
"learning_rate": 2.79378637965361e-05,
"loss": 0.0,
"step": 212000
},
{
"epoch": 0.33922575114558534,
"grad_norm": 0.0008803669479675591,
"learning_rate": 2.7928333363673716e-05,
"loss": 0.0,
"step": 212500
},
{
"epoch": 0.34002392938357495,
"grad_norm": 0.00021209794795140624,
"learning_rate": 2.791878259181997e-05,
"loss": 0.0,
"step": 213000
},
{
"epoch": 0.34082210762156456,
"grad_norm": 0.0012858508853241801,
"learning_rate": 2.7909211496000238e-05,
"loss": 0.0,
"step": 213500
},
{
"epoch": 0.3416202858595542,
"grad_norm": 0.0002028387680184096,
"learning_rate": 2.7899620091271874e-05,
"loss": 0.0,
"step": 214000
},
{
"epoch": 0.3424184640975438,
"grad_norm": 0.000373926421161741,
"learning_rate": 2.789000839272417e-05,
"loss": 0.0,
"step": 214500
},
{
"epoch": 0.3432166423355334,
"grad_norm": 0.000344914966262877,
"learning_rate": 2.7880376415478354e-05,
"loss": 0.0,
"step": 215000
},
{
"epoch": 0.344014820573523,
"grad_norm": 0.0009230823488906026,
"learning_rate": 2.7870724174687565e-05,
"loss": 0.0,
"step": 215500
},
{
"epoch": 0.3448129988115126,
"grad_norm": 0.0006525011267513037,
"learning_rate": 2.7861051685536798e-05,
"loss": 0.0,
"step": 216000
},
{
"epoch": 0.3456111770495022,
"grad_norm": 0.0021701750811189413,
"learning_rate": 2.785135896324292e-05,
"loss": 0.0,
"step": 216500
},
{
"epoch": 0.34640935528749184,
"grad_norm": 0.0005550780915655196,
"learning_rate": 2.7841646023054628e-05,
"loss": 0.0,
"step": 217000
},
{
"epoch": 0.34720753352548145,
"grad_norm": 0.1462016999721527,
"learning_rate": 2.7831912880252417e-05,
"loss": 0.0,
"step": 217500
},
{
"epoch": 0.34800571176347106,
"grad_norm": 0.00038006107206456363,
"learning_rate": 2.7822159550148574e-05,
"loss": 0.0,
"step": 218000
},
{
"epoch": 0.34880389000146067,
"grad_norm": 0.0005914014764130116,
"learning_rate": 2.7812386048087145e-05,
"loss": 0.0,
"step": 218500
},
{
"epoch": 0.3496020682394503,
"grad_norm": 0.001984767848625779,
"learning_rate": 2.78025923894439e-05,
"loss": 0.0,
"step": 219000
},
{
"epoch": 0.3504002464774399,
"grad_norm": 0.001104337745346129,
"learning_rate": 2.779277858962633e-05,
"loss": 0.0,
"step": 219500
},
{
"epoch": 0.3511984247154295,
"grad_norm": 0.0004109264409635216,
"learning_rate": 2.7782944664073612e-05,
"loss": 0.0,
"step": 220000
},
{
"epoch": 0.3519966029534191,
"grad_norm": 0.0002343226078664884,
"learning_rate": 2.7773090628256574e-05,
"loss": 0.0,
"step": 220500
},
{
"epoch": 0.3527947811914087,
"grad_norm": 0.00041432067519053817,
"learning_rate": 2.77632164976777e-05,
"loss": 0.0,
"step": 221000
},
{
"epoch": 0.35359295942939833,
"grad_norm": 0.008808308281004429,
"learning_rate": 2.7753322287871073e-05,
"loss": 0.0,
"step": 221500
},
{
"epoch": 0.35439113766738795,
"grad_norm": 0.0015466894255951047,
"learning_rate": 2.774340801440236e-05,
"loss": 0.0,
"step": 222000
},
{
"epoch": 0.35518931590537756,
"grad_norm": 0.00031447981018573046,
"learning_rate": 2.773347369286882e-05,
"loss": 0.0,
"step": 222500
},
{
"epoch": 0.35598749414336717,
"grad_norm": 0.0005213140393607318,
"learning_rate": 2.7723519338899216e-05,
"loss": 0.0,
"step": 223000
},
{
"epoch": 0.3567856723813568,
"grad_norm": 0.0002478585811331868,
"learning_rate": 2.7713544968153853e-05,
"loss": 0.0,
"step": 223500
},
{
"epoch": 0.3575838506193464,
"grad_norm": 0.0010923146037384868,
"learning_rate": 2.7703550596324514e-05,
"loss": 0.0,
"step": 224000
},
{
"epoch": 0.358382028857336,
"grad_norm": 0.0004491193685680628,
"learning_rate": 2.769353623913445e-05,
"loss": 0.0,
"step": 224500
},
{
"epoch": 0.3591802070953256,
"grad_norm": 0.0028391792438924313,
"learning_rate": 2.7683501912338354e-05,
"loss": 0.0,
"step": 225000
},
{
"epoch": 0.3599783853333152,
"grad_norm": 0.0011303217615932226,
"learning_rate": 2.767344763172234e-05,
"loss": 0.0,
"step": 225500
},
{
"epoch": 0.36077656357130483,
"grad_norm": 0.00829398538917303,
"learning_rate": 2.7663373413103904e-05,
"loss": 0.0,
"step": 226000
},
{
"epoch": 0.36157474180929444,
"grad_norm": 0.00026492562028579414,
"learning_rate": 2.7653279272331912e-05,
"loss": 0.0,
"step": 226500
},
{
"epoch": 0.36237292004728405,
"grad_norm": 0.00034646346466615796,
"learning_rate": 2.764316522528658e-05,
"loss": 0.0,
"step": 227000
},
{
"epoch": 0.3631710982852737,
"grad_norm": 0.004864424932748079,
"learning_rate": 2.7633031287879434e-05,
"loss": 0.0,
"step": 227500
},
{
"epoch": 0.36396927652326333,
"grad_norm": 0.00020658916037064046,
"learning_rate": 2.7622877476053285e-05,
"loss": 0.0,
"step": 228000
},
{
"epoch": 0.36476745476125294,
"grad_norm": 0.00030613088165409863,
"learning_rate": 2.7612703805782225e-05,
"loss": 0.0,
"step": 228500
},
{
"epoch": 0.36556563299924255,
"grad_norm": 0.00909386295825243,
"learning_rate": 2.760251029307157e-05,
"loss": 0.0,
"step": 229000
},
{
"epoch": 0.36636381123723216,
"grad_norm": 0.020065903663635254,
"learning_rate": 2.7592296953957876e-05,
"loss": 0.0,
"step": 229500
},
{
"epoch": 0.3671619894752218,
"grad_norm": 0.00014274036220740527,
"learning_rate": 2.7582063804508868e-05,
"loss": 0.0,
"step": 230000
},
{
"epoch": 0.3679601677132114,
"grad_norm": 0.00015419685223605484,
"learning_rate": 2.7571810860823443e-05,
"loss": 0.0,
"step": 230500
},
{
"epoch": 0.368758345951201,
"grad_norm": 0.00025692241615615785,
"learning_rate": 2.7561538139031653e-05,
"loss": 0.0,
"step": 231000
},
{
"epoch": 0.3695565241891906,
"grad_norm": 0.0001514313480583951,
"learning_rate": 2.7551245655294637e-05,
"loss": 0.0,
"step": 231500
},
{
"epoch": 0.3703547024271802,
"grad_norm": 0.0005323386285454035,
"learning_rate": 2.7540933425804655e-05,
"loss": 0.0,
"step": 232000
},
{
"epoch": 0.3711528806651698,
"grad_norm": 0.029986457899212837,
"learning_rate": 2.7530601466785003e-05,
"loss": 0.0,
"step": 232500
},
{
"epoch": 0.37195105890315944,
"grad_norm": 530.8088989257812,
"learning_rate": 2.752024979449004e-05,
"loss": 0.0,
"step": 233000
},
{
"epoch": 0.37274923714114905,
"grad_norm": 0.0007142575341276824,
"learning_rate": 2.7509878425205117e-05,
"loss": 0.0,
"step": 233500
},
{
"epoch": 0.37354741537913866,
"grad_norm": 0.00024393905187025666,
"learning_rate": 2.7499487375246588e-05,
"loss": 0.0,
"step": 234000
},
{
"epoch": 0.37434559361712827,
"grad_norm": 0.00046698853839188814,
"learning_rate": 2.7489076660961762e-05,
"loss": 0.0,
"step": 234500
},
{
"epoch": 0.3751437718551179,
"grad_norm": 3171.53173828125,
"learning_rate": 2.7478646298728884e-05,
"loss": 0.0,
"step": 235000
},
{
"epoch": 0.3759419500931075,
"grad_norm": 0.0002929008915089071,
"learning_rate": 2.7468196304957114e-05,
"loss": 0.0,
"step": 235500
},
{
"epoch": 0.3767401283310971,
"grad_norm": 0.00025504553923383355,
"learning_rate": 2.7457726696086486e-05,
"loss": 0.0,
"step": 236000
},
{
"epoch": 0.3775383065690867,
"grad_norm": 0.0002734732406679541,
"learning_rate": 2.744723748858791e-05,
"loss": 0.0,
"step": 236500
},
{
"epoch": 0.3783364848070763,
"grad_norm": 0.0002443444973323494,
"learning_rate": 2.7436728698963115e-05,
"loss": 0.0,
"step": 237000
},
{
"epoch": 0.37913466304506593,
"grad_norm": 0.00032405051751993597,
"learning_rate": 2.742620034374463e-05,
"loss": 0.0,
"step": 237500
},
{
"epoch": 0.37993284128305554,
"grad_norm": 0.0015245258109644055,
"learning_rate": 2.7415652439495792e-05,
"loss": 0.0,
"step": 238000
},
{
"epoch": 0.38073101952104516,
"grad_norm": 0.0013498624321073294,
"learning_rate": 2.7405085002810664e-05,
"loss": 0.0,
"step": 238500
},
{
"epoch": 0.38152919775903477,
"grad_norm": 0.00023179441632237285,
"learning_rate": 2.739449805031406e-05,
"loss": 0.0,
"step": 239000
},
{
"epoch": 0.3823273759970244,
"grad_norm": 0.0005060135736130178,
"learning_rate": 2.7383891598661473e-05,
"loss": 0.0,
"step": 239500
},
{
"epoch": 0.383125554235014,
"grad_norm": 0.001060970826074481,
"learning_rate": 2.7373265664539094e-05,
"loss": 0.0,
"step": 240000
},
{
"epoch": 0.3839237324730036,
"grad_norm": 0.0004396582953631878,
"learning_rate": 2.7362620264663755e-05,
"loss": 0.0,
"step": 240500
},
{
"epoch": 0.3847219107109932,
"grad_norm": 0.0010933999437838793,
"learning_rate": 2.735195541578291e-05,
"loss": 0.0,
"step": 241000
},
{
"epoch": 0.3855200889489828,
"grad_norm": 0.0002554966777097434,
"learning_rate": 2.7341271134674613e-05,
"loss": 0.0,
"step": 241500
},
{
"epoch": 0.38631826718697243,
"grad_norm": 0.0016024510841816664,
"learning_rate": 2.7330567438147493e-05,
"loss": 0.0,
"step": 242000
},
{
"epoch": 0.38711644542496204,
"grad_norm": 0.0025034185964614153,
"learning_rate": 2.7319844343040706e-05,
"loss": 0.0,
"step": 242500
},
{
"epoch": 0.38791462366295165,
"grad_norm": 0.00293533387593925,
"learning_rate": 2.7309101866223954e-05,
"loss": 0.0,
"step": 243000
},
{
"epoch": 0.3887128019009413,
"grad_norm": 0.0003197678888682276,
"learning_rate": 2.7298340024597412e-05,
"loss": 0.0,
"step": 243500
},
{
"epoch": 0.38951098013893093,
"grad_norm": 0.0006886592600494623,
"learning_rate": 2.7287558835091715e-05,
"loss": 0.0,
"step": 244000
},
{
"epoch": 0.39030915837692054,
"grad_norm": 0.0002249764947919175,
"learning_rate": 2.7276758314667954e-05,
"loss": 0.0,
"step": 244500
},
{
"epoch": 0.39110733661491015,
"grad_norm": 0.26254597306251526,
"learning_rate": 2.7265938480317622e-05,
"loss": 0.0,
"step": 245000
},
{
"epoch": 0.39190551485289976,
"grad_norm": 0.0028902171179652214,
"learning_rate": 2.7255099349062593e-05,
"loss": 0.0,
"step": 245500
},
{
"epoch": 0.39270369309088937,
"grad_norm": 0.00023200709256343544,
"learning_rate": 2.7244240937955106e-05,
"loss": 0.0,
"step": 246000
},
{
"epoch": 0.393501871328879,
"grad_norm": 0.0006069283117540181,
"learning_rate": 2.7233363264077725e-05,
"loss": 0.0,
"step": 246500
},
{
"epoch": 0.3943000495668686,
"grad_norm": 0.0007884249207563698,
"learning_rate": 2.722246634454333e-05,
"loss": 0.0,
"step": 247000
},
{
"epoch": 0.3950982278048582,
"grad_norm": 0.0011736562009900808,
"learning_rate": 2.7211550196495058e-05,
"loss": 0.0,
"step": 247500
},
{
"epoch": 0.3958964060428478,
"grad_norm": 0.0001821365876821801,
"learning_rate": 2.7200614837106324e-05,
"loss": 0.0,
"step": 248000
},
{
"epoch": 0.3966945842808374,
"grad_norm": 0.0002176285779569298,
"learning_rate": 2.7189660283580738e-05,
"loss": 0.0,
"step": 248500
},
{
"epoch": 0.39749276251882704,
"grad_norm": 0.0016040855553001165,
"learning_rate": 2.7178686553152128e-05,
"loss": 0.0,
"step": 249000
},
{
"epoch": 0.39829094075681665,
"grad_norm": 0.0006293723708949983,
"learning_rate": 2.7167693663084484e-05,
"loss": 0.0,
"step": 249500
},
{
"epoch": 0.39908911899480626,
"grad_norm": 0.000314537959638983,
"learning_rate": 2.7156681630671932e-05,
"loss": 0.0,
"step": 250000
},
{
"epoch": 0.39988729723279587,
"grad_norm": 0.00024686212418600917,
"learning_rate": 2.7145650473238724e-05,
"loss": 0.0,
"step": 250500
},
{
"epoch": 0.4006854754707855,
"grad_norm": 0.00021431001368910074,
"learning_rate": 2.713460020813919e-05,
"loss": 0.0,
"step": 251000
},
{
"epoch": 0.4014836537087751,
"grad_norm": 52.81241989135742,
"learning_rate": 2.7123530852757722e-05,
"loss": 0.0,
"step": 251500
},
{
"epoch": 0.4022818319467647,
"grad_norm": 0.00014636837295256555,
"learning_rate": 2.711244242450876e-05,
"loss": 0.0,
"step": 252000
},
{
"epoch": 0.4030800101847543,
"grad_norm": 0.0008935470250435174,
"learning_rate": 2.710133494083672e-05,
"loss": 0.0,
"step": 252500
},
{
"epoch": 0.4038781884227439,
"grad_norm": 0.00020280707394704223,
"learning_rate": 2.7090208419216022e-05,
"loss": 0.0,
"step": 253000
},
{
"epoch": 0.40467636666073353,
"grad_norm": 509.4928283691406,
"learning_rate": 2.707906287715103e-05,
"loss": 0.0,
"step": 253500
},
{
"epoch": 0.40547454489872314,
"grad_norm": 0.0008828208665363491,
"learning_rate": 2.7067898332176025e-05,
"loss": 0.0,
"step": 254000
},
{
"epoch": 0.40627272313671275,
"grad_norm": 0.00014347408432513475,
"learning_rate": 2.705671480185519e-05,
"loss": 0.0,
"step": 254500
},
{
"epoch": 0.40707090137470237,
"grad_norm": 0.0016916063614189625,
"learning_rate": 2.7045512303782576e-05,
"loss": 0.0,
"step": 255000
},
{
"epoch": 0.407869079612692,
"grad_norm": 0.00024378852685913444,
"learning_rate": 2.7034290855582063e-05,
"loss": 0.0,
"step": 255500
},
{
"epoch": 0.4086672578506816,
"grad_norm": 0.000262060813838616,
"learning_rate": 2.7023050474907364e-05,
"loss": 0.0,
"step": 256000
},
{
"epoch": 0.4094654360886712,
"grad_norm": 0.0003295161877758801,
"learning_rate": 2.7011791179441954e-05,
"loss": 0.0,
"step": 256500
},
{
"epoch": 0.4102636143266608,
"grad_norm": 0.0004501194052863866,
"learning_rate": 2.7000512986899083e-05,
"loss": 0.0,
"step": 257000
},
{
"epoch": 0.4110617925646504,
"grad_norm": 0.00023809456615708768,
"learning_rate": 2.6989215915021727e-05,
"loss": 0.0,
"step": 257500
},
{
"epoch": 0.41185997080264003,
"grad_norm": 0.000276111182756722,
"learning_rate": 2.697789998158255e-05,
"loss": 0.0,
"step": 258000
},
{
"epoch": 0.41265814904062964,
"grad_norm": 0.00017680577002465725,
"learning_rate": 2.6966565204383905e-05,
"loss": 0.0,
"step": 258500
},
{
"epoch": 0.4134563272786193,
"grad_norm": 0.00023531143961008638,
"learning_rate": 2.695521160125778e-05,
"loss": 0.0,
"step": 259000
},
{
"epoch": 0.4142545055166089,
"grad_norm": 0.000134236179292202,
"learning_rate": 2.694383919006579e-05,
"loss": 0.0,
"step": 259500
},
{
"epoch": 0.41505268375459853,
"grad_norm": 1656.9716796875,
"learning_rate": 2.6932447988699128e-05,
"loss": 0.0,
"step": 260000
},
{
"epoch": 0.41585086199258814,
"grad_norm": 0.0007973301107995212,
"learning_rate": 2.6921038015078554e-05,
"loss": 0.0,
"step": 260500
},
{
"epoch": 0.41664904023057775,
"grad_norm": 0.0007964337710291147,
"learning_rate": 2.690960928715436e-05,
"loss": 0.0001,
"step": 261000
},
{
"epoch": 0.41744721846856736,
"grad_norm": 0.0003351388149894774,
"learning_rate": 2.6898161822906345e-05,
"loss": 0.0,
"step": 261500
},
{
"epoch": 0.41824539670655697,
"grad_norm": 0.0008457360090687871,
"learning_rate": 2.6886695640343773e-05,
"loss": 0.0,
"step": 262000
},
{
"epoch": 0.4190435749445466,
"grad_norm": 0.00018712795281317085,
"learning_rate": 2.6875210757505373e-05,
"loss": 0.0,
"step": 262500
},
{
"epoch": 0.4198417531825362,
"grad_norm": 0.00024416804080829024,
"learning_rate": 2.686370719245928e-05,
"loss": 0.0,
"step": 263000
},
{
"epoch": 0.4206399314205258,
"grad_norm": 0.0003195313038304448,
"learning_rate": 2.685218496330303e-05,
"loss": 0.0,
"step": 263500
},
{
"epoch": 0.4214381096585154,
"grad_norm": 0.00019497050379868597,
"learning_rate": 2.6840644088163508e-05,
"loss": 0.0,
"step": 264000
},
{
"epoch": 0.422236287896505,
"grad_norm": 0.004070492926985025,
"learning_rate": 2.6829084585196943e-05,
"loss": 0.0,
"step": 264500
},
{
"epoch": 0.42303446613449464,
"grad_norm": 0.0002645227941684425,
"learning_rate": 2.6817506472588872e-05,
"loss": 0.0,
"step": 265000
},
{
"epoch": 0.42383264437248425,
"grad_norm": 0.00046545593068003654,
"learning_rate": 2.6805909768554106e-05,
"loss": 0.0,
"step": 265500
},
{
"epoch": 0.42463082261047386,
"grad_norm": 0.00025627054856158793,
"learning_rate": 2.6794294491336703e-05,
"loss": 0.0,
"step": 266000
},
{
"epoch": 0.42542900084846347,
"grad_norm": 0.0002409874286968261,
"learning_rate": 2.6782660659209935e-05,
"loss": 0.0,
"step": 266500
},
{
"epoch": 0.4262271790864531,
"grad_norm": 0.0008856968488544226,
"learning_rate": 2.6771008290476268e-05,
"loss": 0.0,
"step": 267000
},
{
"epoch": 0.4270253573244427,
"grad_norm": 0.0036024507135152817,
"learning_rate": 2.6759337403467344e-05,
"loss": 0.0,
"step": 267500
},
{
"epoch": 0.4278235355624323,
"grad_norm": 0.0002771168656181544,
"learning_rate": 2.6747648016543918e-05,
"loss": 0.0,
"step": 268000
},
{
"epoch": 0.4286217138004219,
"grad_norm": 2055.608642578125,
"learning_rate": 2.6735940148095856e-05,
"loss": 0.0,
"step": 268500
},
{
"epoch": 0.4294198920384115,
"grad_norm": 0.00016251685156021267,
"learning_rate": 2.6724213816542105e-05,
"loss": 0.0,
"step": 269000
},
{
"epoch": 0.43021807027640113,
"grad_norm": 0.0016744077438488603,
"learning_rate": 2.6712469040330658e-05,
"loss": 0.0,
"step": 269500
},
{
"epoch": 0.43101624851439074,
"grad_norm": 0.0008171962690539658,
"learning_rate": 2.670070583793851e-05,
"loss": 0.0,
"step": 270000
},
{
"epoch": 0.43181442675238035,
"grad_norm": 0.0004002843634225428,
"learning_rate": 2.6688924227871667e-05,
"loss": 0.0,
"step": 270500
},
{
"epoch": 0.43261260499036996,
"grad_norm": 0.0014142803847789764,
"learning_rate": 2.667712422866508e-05,
"loss": 0.0,
"step": 271000
},
{
"epoch": 0.4334107832283596,
"grad_norm": 0.0025327634066343307,
"learning_rate": 2.6665305858882637e-05,
"loss": 0.0,
"step": 271500
},
{
"epoch": 0.4342089614663492,
"grad_norm": 0.0027283141389489174,
"learning_rate": 2.665346913711711e-05,
"loss": 0.0,
"step": 272000
},
{
"epoch": 0.4350071397043388,
"grad_norm": 0.00230594165623188,
"learning_rate": 2.6641614081990168e-05,
"loss": 0.0,
"step": 272500
},
{
"epoch": 0.4358053179423284,
"grad_norm": 0.00019310094648972154,
"learning_rate": 2.6629740712152305e-05,
"loss": 0.0,
"step": 273000
},
{
"epoch": 0.436603496180318,
"grad_norm": 0.00035802560159936547,
"learning_rate": 2.661784904628283e-05,
"loss": 0.0,
"step": 273500
},
{
"epoch": 0.43740167441830763,
"grad_norm": 0.0004388946108520031,
"learning_rate": 2.6605939103089848e-05,
"loss": 0.0,
"step": 274000
},
{
"epoch": 0.43819985265629724,
"grad_norm": 0.0002641767496243119,
"learning_rate": 2.6594010901310196e-05,
"loss": 0.0,
"step": 274500
},
{
"epoch": 0.4389980308942869,
"grad_norm": 0.002404822502285242,
"learning_rate": 2.658206445970945e-05,
"loss": 0.0,
"step": 275000
},
{
"epoch": 0.4397962091322765,
"grad_norm": 0.011408819817006588,
"learning_rate": 2.6570099797081885e-05,
"loss": 0.0,
"step": 275500
},
{
"epoch": 0.4405943873702661,
"grad_norm": 7161.9951171875,
"learning_rate": 2.6558116932250428e-05,
"loss": 0.0,
"step": 276000
},
{
"epoch": 0.44139256560825574,
"grad_norm": 0.0002718472678679973,
"learning_rate": 2.654611588406666e-05,
"loss": 0.0,
"step": 276500
},
{
"epoch": 0.44219074384624535,
"grad_norm": 0.8729614019393921,
"learning_rate": 2.6534096671410745e-05,
"loss": 0.0,
"step": 277000
},
{
"epoch": 0.44298892208423496,
"grad_norm": 0.00037378541310317814,
"learning_rate": 2.652205931319144e-05,
"loss": 0.0,
"step": 277500
},
{
"epoch": 0.44378710032222457,
"grad_norm": 0.002113162772729993,
"learning_rate": 2.6510003828346052e-05,
"loss": 0.0,
"step": 278000
},
{
"epoch": 0.4445852785602142,
"grad_norm": 0.0005606426857411861,
"learning_rate": 2.649793023584039e-05,
"loss": 0.0,
"step": 278500
},
{
"epoch": 0.4453834567982038,
"grad_norm": 0.0006437928532250226,
"learning_rate": 2.6485838554668765e-05,
"loss": 0.0,
"step": 279000
},
{
"epoch": 0.4461816350361934,
"grad_norm": 0.0003039956500288099,
"learning_rate": 2.6473728803853925e-05,
"loss": 0.0,
"step": 279500
},
{
"epoch": 0.446979813274183,
"grad_norm": 0.01145413052290678,
"learning_rate": 2.646160100244707e-05,
"loss": 0.0,
"step": 280000
},
{
"epoch": 0.4477779915121726,
"grad_norm": 0.010469400323927402,
"learning_rate": 2.6449455169527788e-05,
"loss": 0.0,
"step": 280500
},
{
"epoch": 0.44857616975016223,
"grad_norm": 0.00035089420271106064,
"learning_rate": 2.643729132420402e-05,
"loss": 0.0,
"step": 281000
},
{
"epoch": 0.44937434798815185,
"grad_norm": 0.00039669257239438593,
"learning_rate": 2.6425109485612066e-05,
"loss": 0.0,
"step": 281500
},
{
"epoch": 0.45017252622614146,
"grad_norm": 0.00023531325859948993,
"learning_rate": 2.6412909672916523e-05,
"loss": 0.0,
"step": 282000
},
{
"epoch": 0.45097070446413107,
"grad_norm": 0.00033051602076739073,
"learning_rate": 2.6400691905310262e-05,
"loss": 0.0,
"step": 282500
},
{
"epoch": 0.4517688827021207,
"grad_norm": 0.00016060993948485702,
"learning_rate": 2.638845620201441e-05,
"loss": 0.0,
"step": 283000
},
{
"epoch": 0.4525670609401103,
"grad_norm": 0.11830911040306091,
"learning_rate": 2.6376202582278307e-05,
"loss": 0.0,
"step": 283500
},
{
"epoch": 0.4533652391780999,
"grad_norm": 0.0002819143410306424,
"learning_rate": 2.636393106537947e-05,
"loss": 0.0,
"step": 284000
},
{
"epoch": 0.4541634174160895,
"grad_norm": 0.00021263032977003604,
"learning_rate": 2.6351641670623583e-05,
"loss": 0.0,
"step": 284500
},
{
"epoch": 0.4549615956540791,
"grad_norm": 0.0021154058631509542,
"learning_rate": 2.633933441734445e-05,
"loss": 0.0,
"step": 285000
},
{
"epoch": 0.45575977389206873,
"grad_norm": 58.90241622924805,
"learning_rate": 2.6327009324903978e-05,
"loss": 0.0,
"step": 285500
},
{
"epoch": 0.45655795213005834,
"grad_norm": 0.003388627665117383,
"learning_rate": 2.631466641269213e-05,
"loss": 0.0,
"step": 286000
},
{
"epoch": 0.45735613036804795,
"grad_norm": 0.00012820272240787745,
"learning_rate": 2.6302305700126908e-05,
"loss": 0.0,
"step": 286500
},
{
"epoch": 0.45815430860603756,
"grad_norm": 0.000897783029358834,
"learning_rate": 2.6289927206654315e-05,
"loss": 0.0,
"step": 287000
},
{
"epoch": 0.4589524868440272,
"grad_norm": 0.04280461370944977,
"learning_rate": 2.627753095174833e-05,
"loss": 0.0,
"step": 287500
},
{
"epoch": 0.4597506650820168,
"grad_norm": 0.0004180770483799279,
"learning_rate": 2.6265116954910868e-05,
"loss": 0.0,
"step": 288000
},
{
"epoch": 0.4605488433200064,
"grad_norm": 0.00025683449348434806,
"learning_rate": 2.625268523567177e-05,
"loss": 0.0,
"step": 288500
},
{
"epoch": 0.461347021557996,
"grad_norm": 504.5489196777344,
"learning_rate": 2.6240235813588738e-05,
"loss": 0.0,
"step": 289000
},
{
"epoch": 0.4621451997959856,
"grad_norm": 0.0005199440638534725,
"learning_rate": 2.6227768708247343e-05,
"loss": 0.0,
"step": 289500
},
{
"epoch": 0.46294337803397523,
"grad_norm": 0.04056164249777794,
"learning_rate": 2.6215283939260964e-05,
"loss": 0.0,
"step": 290000
},
{
"epoch": 0.4637415562719649,
"grad_norm": 0.0003166797396261245,
"learning_rate": 2.6202781526270773e-05,
"loss": 0.0,
"step": 290500
},
{
"epoch": 0.4645397345099545,
"grad_norm": 0.008205456659197807,
"learning_rate": 2.61902614889457e-05,
"loss": 0.0,
"step": 291000
},
{
"epoch": 0.4653379127479441,
"grad_norm": 0.004406485706567764,
"learning_rate": 2.6177723846982398e-05,
"loss": 0.0,
"step": 291500
},
{
"epoch": 0.4661360909859337,
"grad_norm": 0.007951625622808933,
"learning_rate": 2.6165168620105222e-05,
"loss": 0.0,
"step": 292000
},
{
"epoch": 0.46693426922392334,
"grad_norm": 0.00030562348547391593,
"learning_rate": 2.6152595828066183e-05,
"loss": 0.0,
"step": 292500
},
{
"epoch": 0.46773244746191295,
"grad_norm": 0.0004488139820750803,
"learning_rate": 2.6140005490644937e-05,
"loss": 0.0,
"step": 293000
},
{
"epoch": 0.46853062569990256,
"grad_norm": 0.00018583855126053095,
"learning_rate": 2.6127397627648736e-05,
"loss": 0.0,
"step": 293500
},
{
"epoch": 0.46932880393789217,
"grad_norm": 0.1338287740945816,
"learning_rate": 2.6114772258912394e-05,
"loss": 0.0,
"step": 294000
},
{
"epoch": 0.4701269821758818,
"grad_norm": 3.4950578212738037,
"learning_rate": 2.610212940429829e-05,
"loss": 0.0,
"step": 294500
},
{
"epoch": 0.4709251604138714,
"grad_norm": 0.000191990053281188,
"learning_rate": 2.6089469083696288e-05,
"loss": 0.0,
"step": 295000
},
{
"epoch": 0.471723338651861,
"grad_norm": 0.00013626097643282264,
"learning_rate": 2.607679131702374e-05,
"loss": 0.0,
"step": 295500
},
{
"epoch": 0.4725215168898506,
"grad_norm": 0.0008477133233100176,
"learning_rate": 2.6064096124225448e-05,
"loss": 0.0,
"step": 296000
},
{
"epoch": 0.4733196951278402,
"grad_norm": 0.06711713969707489,
"learning_rate": 2.6051383525273614e-05,
"loss": 0.0,
"step": 296500
},
{
"epoch": 0.47411787336582983,
"grad_norm": 0.00024246216344181448,
"learning_rate": 2.6038653540167845e-05,
"loss": 0.0,
"step": 297000
},
{
"epoch": 0.47491605160381944,
"grad_norm": 0.0030509470961987972,
"learning_rate": 2.6025906188935084e-05,
"loss": 0.0,
"step": 297500
},
{
"epoch": 0.47571422984180906,
"grad_norm": 0.004496394656598568,
"learning_rate": 2.6013141491629597e-05,
"loss": 0.0,
"step": 298000
},
{
"epoch": 0.47651240807979867,
"grad_norm": 0.000243777220021002,
"learning_rate": 2.600035946833294e-05,
"loss": 0.0,
"step": 298500
},
{
"epoch": 0.4773105863177883,
"grad_norm": 0.06534085422754288,
"learning_rate": 2.5987560139153936e-05,
"loss": 0.0,
"step": 299000
},
{
"epoch": 0.4781087645557779,
"grad_norm": 0.001955215120688081,
"learning_rate": 2.5974743524228625e-05,
"loss": 0.0,
"step": 299500
},
{
"epoch": 0.4789069427937675,
"grad_norm": 0.01624520681798458,
"learning_rate": 2.596190964372023e-05,
"loss": 0.0,
"step": 300000
},
{
"epoch": 0.4789069427937675,
"eval_loss": 1.7790502170100808e-05,
"eval_runtime": 21821.523,
"eval_samples_per_second": 102.069,
"eval_steps_per_second": 3.19,
"step": 300000
},
{
"epoch": 0.4797051210317571,
"grad_norm": 0.5646551251411438,
"learning_rate": 2.5949058517819156e-05,
"loss": 0.0,
"step": 300500
},
{
"epoch": 0.4805032992697467,
"grad_norm": 0.7845109701156616,
"learning_rate": 2.5936190166742935e-05,
"loss": 0.0,
"step": 301000
},
{
"epoch": 0.48130147750773633,
"grad_norm": 0.0011155412066727877,
"learning_rate": 2.592330461073619e-05,
"loss": 0.0,
"step": 301500
},
{
"epoch": 0.48209965574572594,
"grad_norm": 119.20901489257812,
"learning_rate": 2.591040187007061e-05,
"loss": 0.0,
"step": 302000
},
{
"epoch": 0.48289783398371555,
"grad_norm": 0.0005831182352267206,
"learning_rate": 2.589748196504493e-05,
"loss": 0.0,
"step": 302500
},
{
"epoch": 0.48369601222170516,
"grad_norm": 0.0002959502162411809,
"learning_rate": 2.5884544915984875e-05,
"loss": 0.0,
"step": 303000
},
{
"epoch": 0.4844941904596948,
"grad_norm": 0.23617546260356903,
"learning_rate": 2.587159074324316e-05,
"loss": 0.0,
"step": 303500
},
{
"epoch": 0.4852923686976844,
"grad_norm": 0.0008308735559694469,
"learning_rate": 2.5858619467199415e-05,
"loss": 0.0,
"step": 304000
},
{
"epoch": 0.486090546935674,
"grad_norm": 0.0004012871941085905,
"learning_rate": 2.584563110826019e-05,
"loss": 0.0,
"step": 304500
},
{
"epoch": 0.4868887251736636,
"grad_norm": 0.0012458977289497852,
"learning_rate": 2.5832625686858918e-05,
"loss": 0.0,
"step": 305000
},
{
"epoch": 0.4876869034116532,
"grad_norm": 0.00029142654966562986,
"learning_rate": 2.5819603223455854e-05,
"loss": 0.0,
"step": 305500
},
{
"epoch": 0.4884850816496429,
"grad_norm": 0.00011621385056059808,
"learning_rate": 2.5806563738538086e-05,
"loss": 0.0,
"step": 306000
},
{
"epoch": 0.4892832598876325,
"grad_norm": 0.0011832008603960276,
"learning_rate": 2.5793507252619474e-05,
"loss": 0.0,
"step": 306500
},
{
"epoch": 0.4900814381256221,
"grad_norm": 0.0010382416658103466,
"learning_rate": 2.5780433786240605e-05,
"loss": 0.0,
"step": 307000
},
{
"epoch": 0.4908796163636117,
"grad_norm": 0.0004220962291583419,
"learning_rate": 2.5767343359968803e-05,
"loss": 0.0,
"step": 307500
},
{
"epoch": 0.4916777946016013,
"grad_norm": 0.00026486560818739235,
"learning_rate": 2.5754235994398073e-05,
"loss": 0.0,
"step": 308000
},
{
"epoch": 0.49247597283959094,
"grad_norm": 0.00047953566536307335,
"learning_rate": 2.574111171014905e-05,
"loss": 0.0,
"step": 308500
},
{
"epoch": 0.49327415107758055,
"grad_norm": 0.0001465219829697162,
"learning_rate": 2.5727970527868998e-05,
"loss": 0.0,
"step": 309000
},
{
"epoch": 0.49407232931557016,
"grad_norm": 0.0002420053497189656,
"learning_rate": 2.5714812468231773e-05,
"loss": 0.0,
"step": 309500
},
{
"epoch": 0.49487050755355977,
"grad_norm": 0.00032595338416285813,
"learning_rate": 2.5701637551937767e-05,
"loss": 0.0,
"step": 310000
},
{
"epoch": 0.4956686857915494,
"grad_norm": 0.00023393328592646867,
"learning_rate": 2.5688445799713905e-05,
"loss": 0.0,
"step": 310500
},
{
"epoch": 0.496466864029539,
"grad_norm": 0.00023959919053595513,
"learning_rate": 2.5675237232313584e-05,
"loss": 0.0,
"step": 311000
},
{
"epoch": 0.4972650422675286,
"grad_norm": 0.00025843450566753745,
"learning_rate": 2.5662011870516667e-05,
"loss": 0.0,
"step": 311500
},
{
"epoch": 0.4980632205055182,
"grad_norm": 0.0003428571508266032,
"learning_rate": 2.5648769735129435e-05,
"loss": 0.0,
"step": 312000
},
{
"epoch": 0.4988613987435078,
"grad_norm": 0.0012112685944885015,
"learning_rate": 2.5635510846984554e-05,
"loss": 0.0,
"step": 312500
},
{
"epoch": 0.49965957698149743,
"grad_norm": 0.00033978992723859847,
"learning_rate": 2.5622235226941047e-05,
"loss": 0.0,
"step": 313000
},
{
"epoch": 0.500457755219487,
"grad_norm": 0.00017820294306147844,
"learning_rate": 2.560894289588426e-05,
"loss": 0.0,
"step": 313500
},
{
"epoch": 0.5012559334574767,
"grad_norm": 0.00042557052802294493,
"learning_rate": 2.5595633874725832e-05,
"loss": 0.0,
"step": 314000
},
{
"epoch": 0.5020541116954663,
"grad_norm": 0.00031017063884064555,
"learning_rate": 2.5582308184403653e-05,
"loss": 0.0,
"step": 314500
},
{
"epoch": 0.5028522899334559,
"grad_norm": 0.00034823661553673446,
"learning_rate": 2.5568965845881843e-05,
"loss": 0.0,
"step": 315000
},
{
"epoch": 0.5036504681714455,
"grad_norm": 0.00034386530751362443,
"learning_rate": 2.5555606880150712e-05,
"loss": 0.0,
"step": 315500
},
{
"epoch": 0.5044486464094351,
"grad_norm": 0.0007472603465430439,
"learning_rate": 2.554223130822672e-05,
"loss": 0.0,
"step": 316000
},
{
"epoch": 0.5052468246474248,
"grad_norm": 4685.83837890625,
"learning_rate": 2.5528839151152466e-05,
"loss": 0.0,
"step": 316500
},
{
"epoch": 0.5060450028854143,
"grad_norm": 0.0013435595901682973,
"learning_rate": 2.5515430429996633e-05,
"loss": 0.0,
"step": 317000
},
{
"epoch": 0.506843181123404,
"grad_norm": 0.00042831370956264436,
"learning_rate": 2.550200516585396e-05,
"loss": 0.0,
"step": 317500
},
{
"epoch": 0.5076413593613935,
"grad_norm": 0.00022195317433215678,
"learning_rate": 2.548856337984522e-05,
"loss": 0.0,
"step": 318000
},
{
"epoch": 0.5084395375993832,
"grad_norm": 0.0003667280252557248,
"learning_rate": 2.5475105093117168e-05,
"loss": 0.0,
"step": 318500
},
{
"epoch": 0.5092377158373728,
"grad_norm": 0.0003962449845857918,
"learning_rate": 2.546163032684253e-05,
"loss": 0.0,
"step": 319000
},
{
"epoch": 0.5100358940753624,
"grad_norm": 0.00021383292914833874,
"learning_rate": 2.544813910221994e-05,
"loss": 0.0,
"step": 319500
},
{
"epoch": 0.510834072313352,
"grad_norm": 0.00024260817735921592,
"learning_rate": 2.5434631440473945e-05,
"loss": 0.0,
"step": 320000
},
{
"epoch": 0.5116322505513416,
"grad_norm": 0.00034822686575353146,
"learning_rate": 2.5421107362854944e-05,
"loss": 0.0,
"step": 320500
},
{
"epoch": 0.5124304287893312,
"grad_norm": 0.8575116991996765,
"learning_rate": 2.5407566890639156e-05,
"loss": 0.0,
"step": 321000
},
{
"epoch": 0.5132286070273209,
"grad_norm": 0.17129367589950562,
"learning_rate": 2.5394010045128596e-05,
"loss": 0.0,
"step": 321500
},
{
"epoch": 0.5140267852653104,
"grad_norm": 0.000474643602501601,
"learning_rate": 2.5380436847651038e-05,
"loss": 0.0,
"step": 322000
},
{
"epoch": 0.5148249635033001,
"grad_norm": 0.00037549270200543106,
"learning_rate": 2.5366847319559975e-05,
"loss": 0.0,
"step": 322500
},
{
"epoch": 0.5156231417412896,
"grad_norm": 0.0004087206325493753,
"learning_rate": 2.5353241482234605e-05,
"loss": 0.0,
"step": 323000
},
{
"epoch": 0.5164213199792793,
"grad_norm": 0.11741995066404343,
"learning_rate": 2.5339619357079772e-05,
"loss": 0.0,
"step": 323500
},
{
"epoch": 0.5172194982172689,
"grad_norm": 0.00035057743662036955,
"learning_rate": 2.5325980965525945e-05,
"loss": 0.0,
"step": 324000
},
{
"epoch": 0.5180176764552585,
"grad_norm": 14.589872360229492,
"learning_rate": 2.5312326329029192e-05,
"loss": 0.0,
"step": 324500
},
{
"epoch": 0.5188158546932481,
"grad_norm": 0.00046064663911238313,
"learning_rate": 2.5298655469071128e-05,
"loss": 0.0,
"step": 325000
},
{
"epoch": 0.5196140329312378,
"grad_norm": 0.0005871613975614309,
"learning_rate": 2.5284968407158904e-05,
"loss": 0.0,
"step": 325500
},
{
"epoch": 0.5204122111692273,
"grad_norm": 0.00022222854022402316,
"learning_rate": 2.5271265164825135e-05,
"loss": 0.0,
"step": 326000
},
{
"epoch": 0.521210389407217,
"grad_norm": 0.0003983532660640776,
"learning_rate": 2.525754576362792e-05,
"loss": 0.0,
"step": 326500
},
{
"epoch": 0.5220085676452065,
"grad_norm": 0.0004353003459982574,
"learning_rate": 2.5243810225150764e-05,
"loss": 0.0,
"step": 327000
},
{
"epoch": 0.5228067458831962,
"grad_norm": 0.0003073965781368315,
"learning_rate": 2.523005857100256e-05,
"loss": 0.0,
"step": 327500
},
{
"epoch": 0.5236049241211858,
"grad_norm": 0.0001647748431423679,
"learning_rate": 2.5216290822817556e-05,
"loss": 0.0,
"step": 328000
},
{
"epoch": 0.5244031023591754,
"grad_norm": 0.00037706273724325,
"learning_rate": 2.520250700225532e-05,
"loss": 0.0,
"step": 328500
},
{
"epoch": 0.525201280597165,
"grad_norm": 0.00029242149321362376,
"learning_rate": 2.5188707131000714e-05,
"loss": 0.0,
"step": 329000
},
{
"epoch": 0.5259994588351546,
"grad_norm": 0.00018549045489635319,
"learning_rate": 2.5174891230763827e-05,
"loss": 0.0,
"step": 329500
},
{
"epoch": 0.5267976370731443,
"grad_norm": 0.00017411461158189923,
"learning_rate": 2.516105932327999e-05,
"loss": 0.0,
"step": 330000
},
{
"epoch": 0.5275958153111339,
"grad_norm": 0.0004608782473951578,
"learning_rate": 2.5147211430309704e-05,
"loss": 0.0,
"step": 330500
},
{
"epoch": 0.5283939935491235,
"grad_norm": 0.0007079532369971275,
"learning_rate": 2.5133347573638617e-05,
"loss": 0.0,
"step": 331000
},
{
"epoch": 0.5291921717871131,
"grad_norm": 0.0004867357783950865,
"learning_rate": 2.51194677750775e-05,
"loss": 0.0,
"step": 331500
},
{
"epoch": 0.5299903500251028,
"grad_norm": 0.0003103635390289128,
"learning_rate": 2.5105572056462206e-05,
"loss": 0.0,
"step": 332000
},
{
"epoch": 0.5307885282630923,
"grad_norm": 0.00025766075123101473,
"learning_rate": 2.5091660439653613e-05,
"loss": 0.0,
"step": 332500
},
{
"epoch": 0.531586706501082,
"grad_norm": 0.0006120207253843546,
"learning_rate": 2.5077732946537638e-05,
"loss": 0.0,
"step": 333000
},
{
"epoch": 0.5323848847390715,
"grad_norm": 0.0005194434197619557,
"learning_rate": 2.5063789599025148e-05,
"loss": 0.0,
"step": 333500
},
{
"epoch": 0.5331830629770612,
"grad_norm": 0.00029437083867378533,
"learning_rate": 2.5049830419051977e-05,
"loss": 0.0,
"step": 334000
},
{
"epoch": 0.5339812412150508,
"grad_norm": 0.00030525890178978443,
"learning_rate": 2.503585542857885e-05,
"loss": 0.0,
"step": 334500
},
{
"epoch": 0.5347794194530404,
"grad_norm": 0.00021708759595640004,
"learning_rate": 2.5021864649591373e-05,
"loss": 0.0,
"step": 335000
},
{
"epoch": 0.53557759769103,
"grad_norm": 0.0003332449123263359,
"learning_rate": 2.500785810409998e-05,
"loss": 0.0,
"step": 335500
},
{
"epoch": 0.5363757759290196,
"grad_norm": 0.0004120411758776754,
"learning_rate": 2.4993835814139924e-05,
"loss": 0.0,
"step": 336000
},
{
"epoch": 0.5371739541670092,
"grad_norm": 1294.555419921875,
"learning_rate": 2.497979780177122e-05,
"loss": 0.0,
"step": 336500
},
{
"epoch": 0.5379721324049989,
"grad_norm": 0.00044994373456574976,
"learning_rate": 2.496574408907862e-05,
"loss": 0.0,
"step": 337000
},
{
"epoch": 0.5387703106429884,
"grad_norm": 0.0004527137498371303,
"learning_rate": 2.4951674698171568e-05,
"loss": 0.0,
"step": 337500
},
{
"epoch": 0.5395684888809781,
"grad_norm": 0.00039052587817423046,
"learning_rate": 2.493758965118419e-05,
"loss": 0.0,
"step": 338000
},
{
"epoch": 0.5403666671189676,
"grad_norm": 0.003807082772254944,
"learning_rate": 2.4923488970275225e-05,
"loss": 0.0,
"step": 338500
},
{
"epoch": 0.5411648453569573,
"grad_norm": 0.000191923973034136,
"learning_rate": 2.4909372677628007e-05,
"loss": 0.0,
"step": 339000
},
{
"epoch": 0.5419630235949469,
"grad_norm": 0.0007527422276325524,
"learning_rate": 2.489524079545044e-05,
"loss": 0.0,
"step": 339500
},
{
"epoch": 0.5427612018329365,
"grad_norm": 0.0001783394836820662,
"learning_rate": 2.488109334597496e-05,
"loss": 0.0,
"step": 340000
},
{
"epoch": 0.5435593800709261,
"grad_norm": 0.0002899257524404675,
"learning_rate": 2.4866930351458482e-05,
"loss": 0.0,
"step": 340500
},
{
"epoch": 0.5443575583089157,
"grad_norm": 0.00021108388318680227,
"learning_rate": 2.4852751834182376e-05,
"loss": 0.0,
"step": 341000
},
{
"epoch": 0.5451557365469053,
"grad_norm": 0.0003050707746297121,
"learning_rate": 2.4838557816452438e-05,
"loss": 0.0,
"step": 341500
},
{
"epoch": 0.545953914784895,
"grad_norm": 0.0005089346086606383,
"learning_rate": 2.482434832059885e-05,
"loss": 0.0,
"step": 342000
},
{
"epoch": 0.5467520930228845,
"grad_norm": 0.0003350640181452036,
"learning_rate": 2.481012336897613e-05,
"loss": 0.0,
"step": 342500
},
{
"epoch": 0.5475502712608742,
"grad_norm": 0.00018076538981404155,
"learning_rate": 2.4795882983963133e-05,
"loss": 0.0,
"step": 343000
},
{
"epoch": 0.5483484494988637,
"grad_norm": 0.00013365145423449576,
"learning_rate": 2.4781627187962988e-05,
"loss": 0.0,
"step": 343500
},
{
"epoch": 0.5491466277368534,
"grad_norm": 0.00038794297142885625,
"learning_rate": 2.4767356003403056e-05,
"loss": 0.0,
"step": 344000
},
{
"epoch": 0.549944805974843,
"grad_norm": 0.0001909395505208522,
"learning_rate": 2.4753069452734923e-05,
"loss": 0.0,
"step": 344500
},
{
"epoch": 0.5507429842128326,
"grad_norm": 0.0001607197045814246,
"learning_rate": 2.4738767558434332e-05,
"loss": 0.0,
"step": 345000
},
{
"epoch": 0.5515411624508223,
"grad_norm": 0.00026962198899127543,
"learning_rate": 2.4724450343001184e-05,
"loss": 0.0,
"step": 345500
},
{
"epoch": 0.5523393406888119,
"grad_norm": 0.00013342987222131342,
"learning_rate": 2.4710117828959472e-05,
"loss": 0.0,
"step": 346000
},
{
"epoch": 0.5531375189268015,
"grad_norm": 0.00031549722189083695,
"learning_rate": 2.469577003885726e-05,
"loss": 0.0,
"step": 346500
},
{
"epoch": 0.5539356971647911,
"grad_norm": 2732.885498046875,
"learning_rate": 2.468140699526664e-05,
"loss": 0.0,
"step": 347000
},
{
"epoch": 0.5547338754027807,
"grad_norm": 0.03750293329358101,
"learning_rate": 2.4667028720783712e-05,
"loss": 0.0,
"step": 347500
},
{
"epoch": 0.5555320536407703,
"grad_norm": 0.0010069627314805984,
"learning_rate": 2.465263523802853e-05,
"loss": 0.0,
"step": 348000
},
{
"epoch": 0.55633023187876,
"grad_norm": 0.00019862744375132024,
"learning_rate": 2.463822656964506e-05,
"loss": 0.0,
"step": 348500
},
{
"epoch": 0.5571284101167495,
"grad_norm": 0.0008133440860547125,
"learning_rate": 2.4623802738301183e-05,
"loss": 0.0,
"step": 349000
},
{
"epoch": 0.5579265883547392,
"grad_norm": 0.00027964115724898875,
"learning_rate": 2.4609363766688627e-05,
"loss": 0.0,
"step": 349500
},
{
"epoch": 0.5587247665927287,
"grad_norm": 0.0001666530006332323,
"learning_rate": 2.4594909677522934e-05,
"loss": 0.0,
"step": 350000
},
{
"epoch": 0.5595229448307184,
"grad_norm": 0.00025385370827279985,
"learning_rate": 2.458044049354342e-05,
"loss": 0.0,
"step": 350500
},
{
"epoch": 0.560321123068708,
"grad_norm": 0.00013368998770602047,
"learning_rate": 2.4565956237513173e-05,
"loss": 0.0,
"step": 351000
},
{
"epoch": 0.5611193013066976,
"grad_norm": 0.4133665859699249,
"learning_rate": 2.4551456932218966e-05,
"loss": 0.0,
"step": 351500
},
{
"epoch": 0.5619174795446872,
"grad_norm": 0.14180496335029602,
"learning_rate": 2.453694260047127e-05,
"loss": 0.0,
"step": 352000
},
{
"epoch": 0.5627156577826768,
"grad_norm": 0.00047657452523708344,
"learning_rate": 2.4522413265104182e-05,
"loss": 0.0,
"step": 352500
},
{
"epoch": 0.5635138360206664,
"grad_norm": 0.0002391609741607681,
"learning_rate": 2.4507868948975404e-05,
"loss": 0.0,
"step": 353000
},
{
"epoch": 0.5643120142586561,
"grad_norm": 0.0003856563416775316,
"learning_rate": 2.449330967496621e-05,
"loss": 0.0,
"step": 353500
},
{
"epoch": 0.5651101924966456,
"grad_norm": 0.004248655401170254,
"learning_rate": 2.4478735465981412e-05,
"loss": 0.0,
"step": 354000
},
{
"epoch": 0.5659083707346353,
"grad_norm": 0.028849566355347633,
"learning_rate": 2.4464146344949303e-05,
"loss": 0.0,
"step": 354500
},
{
"epoch": 0.5667065489726248,
"grad_norm": 0.0003297879302408546,
"learning_rate": 2.444954233482164e-05,
"loss": 0.0,
"step": 355000
},
{
"epoch": 0.5675047272106145,
"grad_norm": 0.0020440176595002413,
"learning_rate": 2.4434923458573617e-05,
"loss": 0.0,
"step": 355500
},
{
"epoch": 0.5683029054486041,
"grad_norm": 0.0003888048813678324,
"learning_rate": 2.442028973920379e-05,
"loss": 0.0,
"step": 356000
},
{
"epoch": 0.5691010836865937,
"grad_norm": 0.0005131899379193783,
"learning_rate": 2.44056411997341e-05,
"loss": 0.0,
"step": 356500
},
{
"epoch": 0.5698992619245833,
"grad_norm": 0.0008440042147412896,
"learning_rate": 2.4390977863209777e-05,
"loss": 0.0,
"step": 357000
},
{
"epoch": 0.570697440162573,
"grad_norm": 0.0008664605556987226,
"learning_rate": 2.437629975269933e-05,
"loss": 0.0,
"step": 357500
},
{
"epoch": 0.5714956184005625,
"grad_norm": 0.0005484423600137234,
"learning_rate": 2.4361606891294532e-05,
"loss": 0.0,
"step": 358000
},
{
"epoch": 0.5722937966385522,
"grad_norm": 0.0016136858612298965,
"learning_rate": 2.4346899302110336e-05,
"loss": 0.0,
"step": 358500
},
{
"epoch": 0.5730919748765417,
"grad_norm": 0.0021964486222714186,
"learning_rate": 2.4332177008284888e-05,
"loss": 0.0,
"step": 359000
},
{
"epoch": 0.5738901531145314,
"grad_norm": 0.0003874083631671965,
"learning_rate": 2.4317440032979446e-05,
"loss": 0.0,
"step": 359500
},
{
"epoch": 0.574688331352521,
"grad_norm": 0.0002162757737096399,
"learning_rate": 2.430268839937839e-05,
"loss": 0.0,
"step": 360000
},
{
"epoch": 0.5754865095905106,
"grad_norm": 0.0002551154757384211,
"learning_rate": 2.428792213068914e-05,
"loss": 0.0,
"step": 360500
},
{
"epoch": 0.5762846878285003,
"grad_norm": 0.00028797570848837495,
"learning_rate": 2.427314125014214e-05,
"loss": 0.0,
"step": 361000
},
{
"epoch": 0.5770828660664898,
"grad_norm": 0.0003449781215749681,
"learning_rate": 2.4258345780990833e-05,
"loss": 0.0,
"step": 361500
},
{
"epoch": 0.5778810443044795,
"grad_norm": 0.0002227453514933586,
"learning_rate": 2.4243535746511615e-05,
"loss": 0.0,
"step": 362000
},
{
"epoch": 0.5786792225424691,
"grad_norm": 0.0005646930076181889,
"learning_rate": 2.4228711170003782e-05,
"loss": 0.0,
"step": 362500
},
{
"epoch": 0.5794774007804587,
"grad_norm": 0.00017032682080753148,
"learning_rate": 2.4213872074789518e-05,
"loss": 0.0,
"step": 363000
},
{
"epoch": 0.5802755790184483,
"grad_norm": 0.00020792830036953092,
"learning_rate": 2.4199018484213844e-05,
"loss": 0.0,
"step": 363500
},
{
"epoch": 0.581073757256438,
"grad_norm": 0.00023471614986192435,
"learning_rate": 2.4184150421644586e-05,
"loss": 0.0,
"step": 364000
},
{
"epoch": 0.5818719354944275,
"grad_norm": 0.0009064020705409348,
"learning_rate": 2.4169267910472336e-05,
"loss": 0.0,
"step": 364500
},
{
"epoch": 0.5826701137324172,
"grad_norm": 0.00018221262143924832,
"learning_rate": 2.4154370974110425e-05,
"loss": 0.0,
"step": 365000
},
{
"epoch": 0.5834682919704067,
"grad_norm": 0.2526322305202484,
"learning_rate": 2.4139459635994864e-05,
"loss": 0.0,
"step": 365500
},
{
"epoch": 0.5842664702083964,
"grad_norm": 64.04448699951172,
"learning_rate": 2.412453391958434e-05,
"loss": 0.0,
"step": 366000
},
{
"epoch": 0.585064648446386,
"grad_norm": 0.0006586963427253067,
"learning_rate": 2.4109593848360137e-05,
"loss": 0.0,
"step": 366500
},
{
"epoch": 0.5858628266843756,
"grad_norm": 0.003046433674171567,
"learning_rate": 2.4094639445826134e-05,
"loss": 0.0,
"step": 367000
},
{
"epoch": 0.5866610049223652,
"grad_norm": 0.0009702751412987709,
"learning_rate": 2.4079670735508765e-05,
"loss": 0.0,
"step": 367500
},
{
"epoch": 0.5874591831603548,
"grad_norm": 0.0007255422533489764,
"learning_rate": 2.4064687740956956e-05,
"loss": 0.0,
"step": 368000
},
{
"epoch": 0.5882573613983444,
"grad_norm": 0.000522131216712296,
"learning_rate": 2.4049690485742116e-05,
"loss": 0.0,
"step": 368500
},
{
"epoch": 0.5890555396363341,
"grad_norm": 0.0017868474824354053,
"learning_rate": 2.4034678993458088e-05,
"loss": 0.0,
"step": 369000
},
{
"epoch": 0.5898537178743236,
"grad_norm": 0.002390818204730749,
"learning_rate": 2.4019653287721105e-05,
"loss": 0.0,
"step": 369500
},
{
"epoch": 0.5906518961123133,
"grad_norm": 0.0002911436022259295,
"learning_rate": 2.400461339216978e-05,
"loss": 0.0,
"step": 370000
},
{
"epoch": 0.5914500743503028,
"grad_norm": 0.0005996805848553777,
"learning_rate": 2.3989559330465018e-05,
"loss": 0.0,
"step": 370500
},
{
"epoch": 0.5922482525882925,
"grad_norm": 0.0002952713402919471,
"learning_rate": 2.3974491126290042e-05,
"loss": 0.0,
"step": 371000
},
{
"epoch": 0.5930464308262821,
"grad_norm": 0.0002812375605572015,
"learning_rate": 2.3959408803350304e-05,
"loss": 0.0,
"step": 371500
},
{
"epoch": 0.5938446090642717,
"grad_norm": 0.0002432354522170499,
"learning_rate": 2.3944312385373475e-05,
"loss": 0.0,
"step": 372000
},
{
"epoch": 0.5946427873022613,
"grad_norm": 0.002821897389367223,
"learning_rate": 2.392920189610941e-05,
"loss": 0.0,
"step": 372500
},
{
"epoch": 0.5954409655402509,
"grad_norm": 0.003000877797603607,
"learning_rate": 2.3914077359330088e-05,
"loss": 0.0,
"step": 373000
},
{
"epoch": 0.5962391437782405,
"grad_norm": 0.000387115083867684,
"learning_rate": 2.3898938798829576e-05,
"loss": 0.0,
"step": 373500
},
{
"epoch": 0.5970373220162302,
"grad_norm": 0.0008274815627373755,
"learning_rate": 2.3883786238424035e-05,
"loss": 0.0,
"step": 374000
},
{
"epoch": 0.5978355002542197,
"grad_norm": 0.00043856215779669583,
"learning_rate": 2.3868619701951625e-05,
"loss": 0.0,
"step": 374500
},
{
"epoch": 0.5986336784922094,
"grad_norm": 0.000386549363611266,
"learning_rate": 2.3853439213272506e-05,
"loss": 0.0,
"step": 375000
},
{
"epoch": 0.5994318567301989,
"grad_norm": 0.0003421735018491745,
"learning_rate": 2.383824479626878e-05,
"loss": 0.0,
"step": 375500
},
{
"epoch": 0.6002300349681886,
"grad_norm": 0.00029982542037032545,
"learning_rate": 2.382303647484448e-05,
"loss": 0.0,
"step": 376000
},
{
"epoch": 0.6010282132061782,
"grad_norm": 0.0012008086778223515,
"learning_rate": 2.3807814272925475e-05,
"loss": 0.0,
"step": 376500
},
{
"epoch": 0.6018263914441678,
"grad_norm": 84.65890502929688,
"learning_rate": 2.3792578214459513e-05,
"loss": 0.0,
"step": 377000
},
{
"epoch": 0.6026245696821575,
"grad_norm": 0.0005028890445828438,
"learning_rate": 2.3777328323416116e-05,
"loss": 0.0,
"step": 377500
},
{
"epoch": 0.603422747920147,
"grad_norm": 0.00019518673070706427,
"learning_rate": 2.3762064623786578e-05,
"loss": 0.0,
"step": 378000
},
{
"epoch": 0.6042209261581367,
"grad_norm": 0.0307555440813303,
"learning_rate": 2.3746787139583903e-05,
"loss": 0.0,
"step": 378500
},
{
"epoch": 0.6050191043961263,
"grad_norm": 410.12652587890625,
"learning_rate": 2.3731495894842808e-05,
"loss": 0.0,
"step": 379000
},
{
"epoch": 0.6058172826341159,
"grad_norm": 0.02829531952738762,
"learning_rate": 2.371619091361963e-05,
"loss": 0.0,
"step": 379500
},
{
"epoch": 0.6066154608721055,
"grad_norm": 0.0006308479933068156,
"learning_rate": 2.370087221999233e-05,
"loss": 0.0,
"step": 380000
},
{
"epoch": 0.6074136391100952,
"grad_norm": 0.0002925604931078851,
"learning_rate": 2.3685539838060445e-05,
"loss": 0.0,
"step": 380500
},
{
"epoch": 0.6082118173480847,
"grad_norm": 0.0003732262703124434,
"learning_rate": 2.3670193791945028e-05,
"loss": 0.0,
"step": 381000
},
{
"epoch": 0.6090099955860744,
"grad_norm": 0.0002686434891074896,
"learning_rate": 2.3654834105788658e-05,
"loss": 0.0,
"step": 381500
},
{
"epoch": 0.6098081738240639,
"grad_norm": 0.0002781795628834516,
"learning_rate": 2.363946080375534e-05,
"loss": 0.0,
"step": 382000
},
{
"epoch": 0.6106063520620536,
"grad_norm": 0.003745084395632148,
"learning_rate": 2.3624073910030537e-05,
"loss": 0.0,
"step": 382500
},
{
"epoch": 0.6114045303000432,
"grad_norm": 0.00020644953474402428,
"learning_rate": 2.3608673448821054e-05,
"loss": 0.0,
"step": 383000
},
{
"epoch": 0.6122027085380328,
"grad_norm": 0.0007499917992390692,
"learning_rate": 2.359325944435507e-05,
"loss": 0.0,
"step": 383500
},
{
"epoch": 0.6130008867760224,
"grad_norm": 0.0001678672997513786,
"learning_rate": 2.3577831920882058e-05,
"loss": 0.0,
"step": 384000
},
{
"epoch": 0.613799065014012,
"grad_norm": 0.002954375697299838,
"learning_rate": 2.3562390902672762e-05,
"loss": 0.0,
"step": 384500
},
{
"epoch": 0.6145972432520016,
"grad_norm": 0.00037960358895361423,
"learning_rate": 2.3546936414019152e-05,
"loss": 0.0,
"step": 385000
},
{
"epoch": 0.6153954214899913,
"grad_norm": 0.004529908765107393,
"learning_rate": 2.35314684792344e-05,
"loss": 0.0,
"step": 385500
},
{
"epoch": 0.6161935997279808,
"grad_norm": 0.0032500068191438913,
"learning_rate": 2.3515987122652828e-05,
"loss": 0.0,
"step": 386000
},
{
"epoch": 0.6169917779659705,
"grad_norm": 0.0002675579162314534,
"learning_rate": 2.3500492368629858e-05,
"loss": 0.0,
"step": 386500
},
{
"epoch": 0.61778995620396,
"grad_norm": 0.013961972668766975,
"learning_rate": 2.348498424154201e-05,
"loss": 0.0,
"step": 387000
},
{
"epoch": 0.6185881344419497,
"grad_norm": 0.0020743459463119507,
"learning_rate": 2.3469462765786833e-05,
"loss": 0.0,
"step": 387500
},
{
"epoch": 0.6193863126799393,
"grad_norm": 0.0005027143633924425,
"learning_rate": 2.345392796578288e-05,
"loss": 0.0,
"step": 388000
},
{
"epoch": 0.6201844909179289,
"grad_norm": 0.0003365549782756716,
"learning_rate": 2.343837986596966e-05,
"loss": 0.0,
"step": 388500
},
{
"epoch": 0.6209826691559185,
"grad_norm": 0.0002937042445410043,
"learning_rate": 2.3422818490807615e-05,
"loss": 0.0,
"step": 389000
},
{
"epoch": 0.6217808473939082,
"grad_norm": 0.000486930541228503,
"learning_rate": 2.3407243864778053e-05,
"loss": 0.0,
"step": 389500
},
{
"epoch": 0.6225790256318977,
"grad_norm": 0.00019580399384722114,
"learning_rate": 2.3391656012383152e-05,
"loss": 0.0,
"step": 390000
},
{
"epoch": 0.6233772038698874,
"grad_norm": 0.3469190001487732,
"learning_rate": 2.3376054958145884e-05,
"loss": 0.0,
"step": 390500
},
{
"epoch": 0.6241753821078769,
"grad_norm": 256.0212707519531,
"learning_rate": 2.3360440726609992e-05,
"loss": 0.0,
"step": 391000
},
{
"epoch": 0.6249735603458666,
"grad_norm": 0.0003373539075255394,
"learning_rate": 2.3344813342339952e-05,
"loss": 0.0,
"step": 391500
},
{
"epoch": 0.6257717385838562,
"grad_norm": 0.00019939610501751304,
"learning_rate": 2.332917282992093e-05,
"loss": 0.0,
"step": 392000
},
{
"epoch": 0.6265699168218458,
"grad_norm": 0.00017603930609766394,
"learning_rate": 2.3313519213958745e-05,
"loss": 0.0,
"step": 392500
},
{
"epoch": 0.6273680950598355,
"grad_norm": 0.00020907670841552317,
"learning_rate": 2.3297852519079837e-05,
"loss": 0.0,
"step": 393000
},
{
"epoch": 0.628166273297825,
"grad_norm": 0.0003900097217410803,
"learning_rate": 2.3282172769931213e-05,
"loss": 0.0,
"step": 393500
},
{
"epoch": 0.6289644515358147,
"grad_norm": 3.75453782081604,
"learning_rate": 2.326647999118042e-05,
"loss": 0.0,
"step": 394000
},
{
"epoch": 0.6297626297738043,
"grad_norm": 0.013970088213682175,
"learning_rate": 2.325077420751551e-05,
"loss": 0.0,
"step": 394500
},
{
"epoch": 0.6305608080117939,
"grad_norm": 0.00030075875110924244,
"learning_rate": 2.323505544364498e-05,
"loss": 0.0,
"step": 395000
},
{
"epoch": 0.6313589862497835,
"grad_norm": 0.0005138195701874793,
"learning_rate": 2.321932372429776e-05,
"loss": 0.0,
"step": 395500
},
{
"epoch": 0.6321571644877731,
"grad_norm": 178.2792510986328,
"learning_rate": 2.3203579074223158e-05,
"loss": 0.0001,
"step": 396000
},
{
"epoch": 0.6329553427257627,
"grad_norm": 0.0002129318891093135,
"learning_rate": 2.3187821518190826e-05,
"loss": 0.0,
"step": 396500
},
{
"epoch": 0.6337535209637524,
"grad_norm": 0.08376732468605042,
"learning_rate": 2.317205108099072e-05,
"loss": 0.0,
"step": 397000
},
{
"epoch": 0.6345516992017419,
"grad_norm": 0.0006943497573956847,
"learning_rate": 2.3156267787433056e-05,
"loss": 0.0,
"step": 397500
},
{
"epoch": 0.6353498774397316,
"grad_norm": 0.0002545543829910457,
"learning_rate": 2.3140471662348283e-05,
"loss": 0.0,
"step": 398000
},
{
"epoch": 0.6361480556777211,
"grad_norm": 0.0022383469622582197,
"learning_rate": 2.3124662730587027e-05,
"loss": 0.0,
"step": 398500
},
{
"epoch": 0.6369462339157108,
"grad_norm": 0.00022144192189443856,
"learning_rate": 2.310884101702007e-05,
"loss": 0.0,
"step": 399000
},
{
"epoch": 0.6377444121537004,
"grad_norm": 0.01328088715672493,
"learning_rate": 2.30930065465383e-05,
"loss": 0.0,
"step": 399500
},
{
"epoch": 0.63854259039169,
"grad_norm": 0.00027457988471724093,
"learning_rate": 2.3077159344052675e-05,
"loss": 0.0,
"step": 400000
},
{
"epoch": 0.6393407686296796,
"grad_norm": 0.00017897885118145496,
"learning_rate": 2.306129943449418e-05,
"loss": 0.0,
"step": 400500
},
{
"epoch": 0.6401389468676693,
"grad_norm": 0.00017138413386419415,
"learning_rate": 2.3045426842813797e-05,
"loss": 0.0,
"step": 401000
},
{
"epoch": 0.6409371251056588,
"grad_norm": 0.00025630032178014517,
"learning_rate": 2.3029541593982453e-05,
"loss": 0.0,
"step": 401500
},
{
"epoch": 0.6417353033436485,
"grad_norm": 0.0002913358330260962,
"learning_rate": 2.3013643712990987e-05,
"loss": 0.0,
"step": 402000
},
{
"epoch": 0.642533481581638,
"grad_norm": 0.00017182863666675985,
"learning_rate": 2.2997733224850126e-05,
"loss": 0.0,
"step": 402500
},
{
"epoch": 0.6433316598196277,
"grad_norm": 0.0005597301642410457,
"learning_rate": 2.2981810154590402e-05,
"loss": 0.0,
"step": 403000
},
{
"epoch": 0.6441298380576173,
"grad_norm": 0.0002503639261703938,
"learning_rate": 2.2965874527262172e-05,
"loss": 0.0,
"step": 403500
},
{
"epoch": 0.6449280162956069,
"grad_norm": 0.0002138703566743061,
"learning_rate": 2.2949926367935527e-05,
"loss": 0.0,
"step": 404000
},
{
"epoch": 0.6457261945335965,
"grad_norm": 0.0001715560065349564,
"learning_rate": 2.2933965701700286e-05,
"loss": 0.0,
"step": 404500
},
{
"epoch": 0.6465243727715861,
"grad_norm": 0.00013465856318362057,
"learning_rate": 2.2917992553665937e-05,
"loss": 0.0,
"step": 405000
},
{
"epoch": 0.6473225510095757,
"grad_norm": 0.0001602515549166128,
"learning_rate": 2.2902006948961597e-05,
"loss": 0.0,
"step": 405500
},
{
"epoch": 0.6481207292475654,
"grad_norm": 0.00034399403375573456,
"learning_rate": 2.2886008912736e-05,
"loss": 0.0,
"step": 406000
},
{
"epoch": 0.6489189074855549,
"grad_norm": 0.00011978346446994692,
"learning_rate": 2.286999847015743e-05,
"loss": 0.0,
"step": 406500
},
{
"epoch": 0.6497170857235446,
"grad_norm": 0.00019513712322805077,
"learning_rate": 2.2853975646413668e-05,
"loss": 0.0,
"step": 407000
},
{
"epoch": 0.6505152639615341,
"grad_norm": 0.001562693272717297,
"learning_rate": 2.2837940466712003e-05,
"loss": 0.0,
"step": 407500
},
{
"epoch": 0.6513134421995238,
"grad_norm": 0.0001763744803611189,
"learning_rate": 2.2821892956279154e-05,
"loss": 0.0,
"step": 408000
},
{
"epoch": 0.6521116204375135,
"grad_norm": 0.00090842938516289,
"learning_rate": 2.2805833140361228e-05,
"loss": 0.0,
"step": 408500
},
{
"epoch": 0.652909798675503,
"grad_norm": 0.003621726995334029,
"learning_rate": 2.2789761044223695e-05,
"loss": 0.0,
"step": 409000
},
{
"epoch": 0.6537079769134927,
"grad_norm": 0.00015422285650856793,
"learning_rate": 2.2773676693151353e-05,
"loss": 0.0,
"step": 409500
},
{
"epoch": 0.6545061551514822,
"grad_norm": 0.0012531452812254429,
"learning_rate": 2.275758011244827e-05,
"loss": 0.0,
"step": 410000
},
{
"epoch": 0.6553043333894719,
"grad_norm": 0.0006565919611603022,
"learning_rate": 2.2741471327437767e-05,
"loss": 0.0,
"step": 410500
},
{
"epoch": 0.6561025116274615,
"grad_norm": 0.00330674322322011,
"learning_rate": 2.2725350363462343e-05,
"loss": 0.0,
"step": 411000
},
{
"epoch": 0.6569006898654511,
"grad_norm": 0.0005322208162397146,
"learning_rate": 2.270921724588368e-05,
"loss": 0.0,
"step": 411500
},
{
"epoch": 0.6576988681034407,
"grad_norm": 0.00018437649123370647,
"learning_rate": 2.269307200008256e-05,
"loss": 0.0,
"step": 412000
},
{
"epoch": 0.6584970463414304,
"grad_norm": 0.00015154003631323576,
"learning_rate": 2.267691465145886e-05,
"loss": 0.0,
"step": 412500
},
{
"epoch": 0.6592952245794199,
"grad_norm": 0.00023861938097979873,
"learning_rate": 2.2660745225431494e-05,
"loss": 0.0,
"step": 413000
},
{
"epoch": 0.6600934028174096,
"grad_norm": 0.0001982437534024939,
"learning_rate": 2.2644563747438375e-05,
"loss": 0.0,
"step": 413500
},
{
"epoch": 0.6608915810553991,
"grad_norm": 0.00024549709632992744,
"learning_rate": 2.2628370242936377e-05,
"loss": 0.0,
"step": 414000
},
{
"epoch": 0.6616897592933888,
"grad_norm": 0.000128219326143153,
"learning_rate": 2.2612164737401288e-05,
"loss": 0.0,
"step": 414500
},
{
"epoch": 0.6624879375313784,
"grad_norm": 0.00017211749218404293,
"learning_rate": 2.2595947256327786e-05,
"loss": 0.0,
"step": 415000
},
{
"epoch": 0.663286115769368,
"grad_norm": 0.0005725378287024796,
"learning_rate": 2.2579717825229384e-05,
"loss": 0.0,
"step": 415500
},
{
"epoch": 0.6640842940073576,
"grad_norm": 0.002562319627031684,
"learning_rate": 2.256347646963839e-05,
"loss": 0.0,
"step": 416000
},
{
"epoch": 0.6648824722453472,
"grad_norm": 0.0054826377891004086,
"learning_rate": 2.254722321510588e-05,
"loss": 0.0,
"step": 416500
},
{
"epoch": 0.6656806504833368,
"grad_norm": 0.04357144236564636,
"learning_rate": 2.2530958087201656e-05,
"loss": 0.0,
"step": 417000
},
{
"epoch": 0.6664788287213265,
"grad_norm": 0.002114373492076993,
"learning_rate": 2.251468111151418e-05,
"loss": 0.0,
"step": 417500
},
{
"epoch": 0.667277006959316,
"grad_norm": 0.0005999524146318436,
"learning_rate": 2.249839231365056e-05,
"loss": 0.0,
"step": 418000
},
{
"epoch": 0.6680751851973057,
"grad_norm": 0.0004485425597522408,
"learning_rate": 2.2482091719236514e-05,
"loss": 0.0,
"step": 418500
},
{
"epoch": 0.6688733634352952,
"grad_norm": 0.00020038214279338717,
"learning_rate": 2.2465779353916305e-05,
"loss": 0.0,
"step": 419000
},
{
"epoch": 0.6696715416732849,
"grad_norm": 0.00029285537311807275,
"learning_rate": 2.2449455243352724e-05,
"loss": 0.0,
"step": 419500
},
{
"epoch": 0.6704697199112745,
"grad_norm": 0.0003520438331179321,
"learning_rate": 2.243311941322703e-05,
"loss": 0.0,
"step": 420000
},
{
"epoch": 0.6712678981492641,
"grad_norm": 0.0002213429397670552,
"learning_rate": 2.2416771889238928e-05,
"loss": 0.0,
"step": 420500
},
{
"epoch": 0.6720660763872537,
"grad_norm": 0.00015232243458740413,
"learning_rate": 2.240041269710652e-05,
"loss": 0.0,
"step": 421000
},
{
"epoch": 0.6728642546252434,
"grad_norm": 0.0004849177203141153,
"learning_rate": 2.2384041862566254e-05,
"loss": 0.0,
"step": 421500
},
{
"epoch": 0.6736624328632329,
"grad_norm": 0.00021231337450444698,
"learning_rate": 2.23676594113729e-05,
"loss": 0.0,
"step": 422000
},
{
"epoch": 0.6744606111012226,
"grad_norm": 0.0001643193099880591,
"learning_rate": 2.235126536929951e-05,
"loss": 0.0,
"step": 422500
},
{
"epoch": 0.6752587893392121,
"grad_norm": 0.00012516920105554163,
"learning_rate": 2.2334859762137362e-05,
"loss": 0.0,
"step": 423000
},
{
"epoch": 0.6760569675772018,
"grad_norm": 0.0005767050897702575,
"learning_rate": 2.231844261569593e-05,
"loss": 0.0,
"step": 423500
},
{
"epoch": 0.6768551458151915,
"grad_norm": 0.00015193522267509252,
"learning_rate": 2.2302013955802847e-05,
"loss": 0.0,
"step": 424000
},
{
"epoch": 0.677653324053181,
"grad_norm": 0.00024528297944925725,
"learning_rate": 2.228557380830385e-05,
"loss": 0.0,
"step": 424500
},
{
"epoch": 0.6784515022911707,
"grad_norm": 0.00018646153330337256,
"learning_rate": 2.226912219906276e-05,
"loss": 0.0,
"step": 425000
},
{
"epoch": 0.6792496805291602,
"grad_norm": 0.00017168234626296908,
"learning_rate": 2.225265915396142e-05,
"loss": 0.0,
"step": 425500
},
{
"epoch": 0.6800478587671499,
"grad_norm": 0.00011930393520742655,
"learning_rate": 2.2236184698899667e-05,
"loss": 0.0,
"step": 426000
},
{
"epoch": 0.6808460370051395,
"grad_norm": 0.0001393697311868891,
"learning_rate": 2.2219698859795292e-05,
"loss": 0.0,
"step": 426500
},
{
"epoch": 0.6816442152431291,
"grad_norm": 0.00014085869770497084,
"learning_rate": 2.220320166258399e-05,
"loss": 0.0,
"step": 427000
},
{
"epoch": 0.6824423934811187,
"grad_norm": 0.00041550418245606124,
"learning_rate": 2.2186693133219322e-05,
"loss": 0.0,
"step": 427500
},
{
"epoch": 0.6832405717191083,
"grad_norm": 0.00016101829532999545,
"learning_rate": 2.217017329767269e-05,
"loss": 0.0,
"step": 428000
},
{
"epoch": 0.6840387499570979,
"grad_norm": 2.820543050765991,
"learning_rate": 2.2153642181933264e-05,
"loss": 0.0,
"step": 428500
},
{
"epoch": 0.6848369281950876,
"grad_norm": 0.00014117424143478274,
"learning_rate": 2.213709981200798e-05,
"loss": 0.0,
"step": 429000
},
{
"epoch": 0.6856351064330771,
"grad_norm": 0.0002793257881421596,
"learning_rate": 2.2120546213921473e-05,
"loss": 0.0,
"step": 429500
},
{
"epoch": 0.6864332846710668,
"grad_norm": 0.0001950536243384704,
"learning_rate": 2.2103981413716033e-05,
"loss": 0.0,
"step": 430000
},
{
"epoch": 0.6872314629090563,
"grad_norm": 0.0003645491087809205,
"learning_rate": 2.2087405437451577e-05,
"loss": 0.0,
"step": 430500
},
{
"epoch": 0.688029641147046,
"grad_norm": 0.00022027833620086312,
"learning_rate": 2.2070818311205615e-05,
"loss": 0.0,
"step": 431000
},
{
"epoch": 0.6888278193850356,
"grad_norm": 0.001554289017803967,
"learning_rate": 2.205422006107318e-05,
"loss": 0.0,
"step": 431500
},
{
"epoch": 0.6896259976230252,
"grad_norm": 0.00027029041666537523,
"learning_rate": 2.2037610713166828e-05,
"loss": 0.0,
"step": 432000
},
{
"epoch": 0.6904241758610148,
"grad_norm": 0.00016059460176620632,
"learning_rate": 2.202099029361655e-05,
"loss": 0.0,
"step": 432500
},
{
"epoch": 0.6912223540990045,
"grad_norm": 0.0009365231380797923,
"learning_rate": 2.2004358828569774e-05,
"loss": 0.0,
"step": 433000
},
{
"epoch": 0.692020532336994,
"grad_norm": 0.0001574302586959675,
"learning_rate": 2.1987716344191296e-05,
"loss": 0.0,
"step": 433500
},
{
"epoch": 0.6928187105749837,
"grad_norm": 0.00016089307609945536,
"learning_rate": 2.197106286666324e-05,
"loss": 0.0,
"step": 434000
},
{
"epoch": 0.6936168888129732,
"grad_norm": 0.0001953535247594118,
"learning_rate": 2.1954398422185052e-05,
"loss": 0.0,
"step": 434500
},
{
"epoch": 0.6944150670509629,
"grad_norm": 0.0003318938543088734,
"learning_rate": 2.1937723036973396e-05,
"loss": 0.0,
"step": 435000
},
{
"epoch": 0.6952132452889525,
"grad_norm": 0.0007233397336676717,
"learning_rate": 2.1921036737262177e-05,
"loss": 0.0,
"step": 435500
},
{
"epoch": 0.6960114235269421,
"grad_norm": 8.286305092042312e-05,
"learning_rate": 2.1904339549302448e-05,
"loss": 0.0,
"step": 436000
},
{
"epoch": 0.6968096017649317,
"grad_norm": 7.818207814125344e-05,
"learning_rate": 2.188763149936241e-05,
"loss": 0.0,
"step": 436500
},
{
"epoch": 0.6976077800029213,
"grad_norm": 0.0001744274777593091,
"learning_rate": 2.1870912613727345e-05,
"loss": 0.0,
"step": 437000
},
{
"epoch": 0.6984059582409109,
"grad_norm": 0.0002287587121827528,
"learning_rate": 2.185418291869958e-05,
"loss": 0.0,
"step": 437500
},
{
"epoch": 0.6992041364789006,
"grad_norm": 0.00016285310266539454,
"learning_rate": 2.1837442440598445e-05,
"loss": 0.0,
"step": 438000
},
{
"epoch": 0.7000023147168901,
"grad_norm": 0.0001769603113643825,
"learning_rate": 2.1820691205760242e-05,
"loss": 0.0,
"step": 438500
},
{
"epoch": 0.7008004929548798,
"grad_norm": 0.00014560433919541538,
"learning_rate": 2.1803929240538193e-05,
"loss": 0.0,
"step": 439000
},
{
"epoch": 0.7015986711928694,
"grad_norm": 0.00030399439856410027,
"learning_rate": 2.1787156571302395e-05,
"loss": 0.0,
"step": 439500
},
{
"epoch": 0.702396849430859,
"grad_norm": 0.0008793527958914638,
"learning_rate": 2.177037322443979e-05,
"loss": 0.0,
"step": 440000
},
{
"epoch": 0.7031950276688487,
"grad_norm": 0.00017075143114198,
"learning_rate": 2.1753579226354126e-05,
"loss": 0.0,
"step": 440500
},
{
"epoch": 0.7039932059068382,
"grad_norm": 9.666175174061209e-05,
"learning_rate": 2.1736774603465886e-05,
"loss": 0.0,
"step": 441000
},
{
"epoch": 0.7047913841448279,
"grad_norm": 0.0006323313573375344,
"learning_rate": 2.1719959382212294e-05,
"loss": 0.0,
"step": 441500
},
{
"epoch": 0.7055895623828174,
"grad_norm": 38.205810546875,
"learning_rate": 2.1703133589047222e-05,
"loss": 0.0,
"step": 442000
},
{
"epoch": 0.7063877406208071,
"grad_norm": 0.000583072891458869,
"learning_rate": 2.16862972504412e-05,
"loss": 0.0,
"step": 442500
},
{
"epoch": 0.7071859188587967,
"grad_norm": 0.002959677018225193,
"learning_rate": 2.166945039288132e-05,
"loss": 0.0,
"step": 443000
},
{
"epoch": 0.7079840970967863,
"grad_norm": 0.00010426915105199441,
"learning_rate": 2.165259304287125e-05,
"loss": 0.0,
"step": 443500
},
{
"epoch": 0.7087822753347759,
"grad_norm": 0.0002889011229854077,
"learning_rate": 2.1635725226931143e-05,
"loss": 0.0,
"step": 444000
},
{
"epoch": 0.7095804535727656,
"grad_norm": 0.000344561121892184,
"learning_rate": 2.1618846971597636e-05,
"loss": 0.0,
"step": 444500
},
{
"epoch": 0.7103786318107551,
"grad_norm": 0.0001297101262025535,
"learning_rate": 2.160195830342377e-05,
"loss": 0.0,
"step": 445000
},
{
"epoch": 0.7111768100487448,
"grad_norm": 0.05745575204491615,
"learning_rate": 2.1585059248978978e-05,
"loss": 0.0,
"step": 445500
},
{
"epoch": 0.7119749882867343,
"grad_norm": 0.0002907444431912154,
"learning_rate": 2.156814983484904e-05,
"loss": 0.0,
"step": 446000
},
{
"epoch": 0.712773166524724,
"grad_norm": 0.0002803723618853837,
"learning_rate": 2.1551230087636018e-05,
"loss": 0.0,
"step": 446500
},
{
"epoch": 0.7135713447627136,
"grad_norm": 0.00019375992997083813,
"learning_rate": 2.1534300033958244e-05,
"loss": 0.0,
"step": 447000
},
{
"epoch": 0.7143695230007032,
"grad_norm": 0.000927963585127145,
"learning_rate": 2.151735970045025e-05,
"loss": 0.0,
"step": 447500
},
{
"epoch": 0.7151677012386928,
"grad_norm": 0.00018430485215503722,
"learning_rate": 2.150040911376276e-05,
"loss": 0.0,
"step": 448000
},
{
"epoch": 0.7159658794766824,
"grad_norm": 0.0001356995344394818,
"learning_rate": 2.1483448300562604e-05,
"loss": 0.0,
"step": 448500
},
{
"epoch": 0.716764057714672,
"grad_norm": 0.0004225323209539056,
"learning_rate": 2.1466477287532726e-05,
"loss": 0.0,
"step": 449000
},
{
"epoch": 0.7175622359526617,
"grad_norm": 0.00017753643624018878,
"learning_rate": 2.1449496101372094e-05,
"loss": 0.0,
"step": 449500
},
{
"epoch": 0.7183604141906512,
"grad_norm": 0.0015126933576539159,
"learning_rate": 2.1432504768795695e-05,
"loss": 0.0,
"step": 450000
},
{
"epoch": 0.7183604141906512,
"eval_loss": 2.1175588699406944e-05,
"eval_runtime": 22089.4767,
"eval_samples_per_second": 100.831,
"eval_steps_per_second": 3.151,
"step": 450000
},
{
"epoch": 0.7191585924286409,
"grad_norm": 0.00032660740544088185,
"learning_rate": 2.1415503316534478e-05,
"loss": 0.0,
"step": 450500
},
{
"epoch": 0.7199567706666304,
"grad_norm": 0.0006630943971686065,
"learning_rate": 2.1398491771335297e-05,
"loss": 0.0,
"step": 451000
},
{
"epoch": 0.7207549489046201,
"grad_norm": 0.0002114923991030082,
"learning_rate": 2.1381470159960905e-05,
"loss": 0.0,
"step": 451500
},
{
"epoch": 0.7215531271426097,
"grad_norm": 0.0004499349743127823,
"learning_rate": 2.1364438509189877e-05,
"loss": 0.0,
"step": 452000
},
{
"epoch": 0.7223513053805993,
"grad_norm": 0.0001622430863790214,
"learning_rate": 2.134739684581659e-05,
"loss": 0.0,
"step": 452500
},
{
"epoch": 0.7231494836185889,
"grad_norm": 0.0010711727663874626,
"learning_rate": 2.133034519665117e-05,
"loss": 0.0,
"step": 453000
},
{
"epoch": 0.7239476618565786,
"grad_norm": 0.0011754089500755072,
"learning_rate": 2.1313283588519452e-05,
"loss": 0.0,
"step": 453500
},
{
"epoch": 0.7247458400945681,
"grad_norm": 0.00015875005919951946,
"learning_rate": 2.129621204826294e-05,
"loss": 0.0,
"step": 454000
},
{
"epoch": 0.7255440183325578,
"grad_norm": 291.006103515625,
"learning_rate": 2.127913060273875e-05,
"loss": 0.0,
"step": 454500
},
{
"epoch": 0.7263421965705474,
"grad_norm": 0.0004946871194988489,
"learning_rate": 2.1262039278819618e-05,
"loss": 0.0,
"step": 455000
},
{
"epoch": 0.727140374808537,
"grad_norm": 0.00020070774189662188,
"learning_rate": 2.124493810339378e-05,
"loss": 0.0,
"step": 455500
},
{
"epoch": 0.7279385530465267,
"grad_norm": 0.0009217039914801717,
"learning_rate": 2.1227827103364987e-05,
"loss": 0.0,
"step": 456000
},
{
"epoch": 0.7287367312845162,
"grad_norm": 49.21879577636719,
"learning_rate": 2.121070630565245e-05,
"loss": 0.0,
"step": 456500
},
{
"epoch": 0.7295349095225059,
"grad_norm": 0.00036531256046146154,
"learning_rate": 2.119357573719078e-05,
"loss": 0.0,
"step": 457000
},
{
"epoch": 0.7303330877604954,
"grad_norm": 0.006041168235242367,
"learning_rate": 2.1176435424929984e-05,
"loss": 0.0,
"step": 457500
},
{
"epoch": 0.7311312659984851,
"grad_norm": 0.00016016997687984258,
"learning_rate": 2.115928539583538e-05,
"loss": 0.0,
"step": 458000
},
{
"epoch": 0.7319294442364747,
"grad_norm": 9.255438635591418e-05,
"learning_rate": 2.1142125676887573e-05,
"loss": 0.0,
"step": 458500
},
{
"epoch": 0.7327276224744643,
"grad_norm": 13.3391695022583,
"learning_rate": 2.1124956295082408e-05,
"loss": 0.0,
"step": 459000
},
{
"epoch": 0.7335258007124539,
"grad_norm": 0.002709623659029603,
"learning_rate": 2.110777727743095e-05,
"loss": 0.0,
"step": 459500
},
{
"epoch": 0.7343239789504435,
"grad_norm": 0.0001365782372886315,
"learning_rate": 2.1090588650959407e-05,
"loss": 0.0,
"step": 460000
},
{
"epoch": 0.7351221571884331,
"grad_norm": 0.0019237701781094074,
"learning_rate": 2.107339044270911e-05,
"loss": 0.0,
"step": 460500
},
{
"epoch": 0.7359203354264228,
"grad_norm": 0.00011767734395107254,
"learning_rate": 2.105618267973646e-05,
"loss": 0.0,
"step": 461000
},
{
"epoch": 0.7367185136644123,
"grad_norm": 0.00024400111578870565,
"learning_rate": 2.10389653891129e-05,
"loss": 0.0,
"step": 461500
},
{
"epoch": 0.737516691902402,
"grad_norm": 9.57796219154261e-05,
"learning_rate": 2.1021738597924846e-05,
"loss": 0.0,
"step": 462000
},
{
"epoch": 0.7383148701403915,
"grad_norm": 0.0005409715231508017,
"learning_rate": 2.1004502333273672e-05,
"loss": 0.0,
"step": 462500
},
{
"epoch": 0.7391130483783812,
"grad_norm": 0.3090362250804901,
"learning_rate": 2.0987256622275653e-05,
"loss": 0.0,
"step": 463000
},
{
"epoch": 0.7399112266163708,
"grad_norm": 0.00047693413216620684,
"learning_rate": 2.0970001492061924e-05,
"loss": 0.0,
"step": 463500
},
{
"epoch": 0.7407094048543604,
"grad_norm": 0.00021121487952768803,
"learning_rate": 2.095273696977844e-05,
"loss": 0.0,
"step": 464000
},
{
"epoch": 0.74150758309235,
"grad_norm": 0.00015116293798200786,
"learning_rate": 2.093546308258593e-05,
"loss": 0.0,
"step": 464500
},
{
"epoch": 0.7423057613303397,
"grad_norm": 0.0003296768991276622,
"learning_rate": 2.091817985765986e-05,
"loss": 0.0,
"step": 465000
},
{
"epoch": 0.7431039395683292,
"grad_norm": 0.000263060734141618,
"learning_rate": 2.090088732219038e-05,
"loss": 0.0,
"step": 465500
},
{
"epoch": 0.7439021178063189,
"grad_norm": 0.00016778981080278754,
"learning_rate": 2.0883585503382285e-05,
"loss": 0.0,
"step": 466000
},
{
"epoch": 0.7447002960443084,
"grad_norm": 0.00032316602300852537,
"learning_rate": 2.0866274428454993e-05,
"loss": 0.0,
"step": 466500
},
{
"epoch": 0.7454984742822981,
"grad_norm": 0.0310523621737957,
"learning_rate": 2.0848954124642457e-05,
"loss": 0.0,
"step": 467000
},
{
"epoch": 0.7462966525202877,
"grad_norm": 0.00014277735317591578,
"learning_rate": 2.0831624619193175e-05,
"loss": 0.0,
"step": 467500
},
{
"epoch": 0.7470948307582773,
"grad_norm": 0.0003888442297466099,
"learning_rate": 2.0814285939370103e-05,
"loss": 0.0,
"step": 468000
},
{
"epoch": 0.7478930089962669,
"grad_norm": 32.5892448425293,
"learning_rate": 2.0796938112450635e-05,
"loss": 0.0,
"step": 468500
},
{
"epoch": 0.7486911872342565,
"grad_norm": 0.0001689651544438675,
"learning_rate": 2.077958116572656e-05,
"loss": 0.0,
"step": 469000
},
{
"epoch": 0.7494893654722461,
"grad_norm": 0.0002664696075953543,
"learning_rate": 2.076221512650401e-05,
"loss": 0.0,
"step": 469500
},
{
"epoch": 0.7502875437102358,
"grad_norm": 0.00024409177422057837,
"learning_rate": 2.074484002210342e-05,
"loss": 0.0,
"step": 470000
},
{
"epoch": 0.7510857219482253,
"grad_norm": 0.00021594902500510216,
"learning_rate": 2.072745587985949e-05,
"loss": 0.0,
"step": 470500
},
{
"epoch": 0.751883900186215,
"grad_norm": 0.000743635231629014,
"learning_rate": 2.0710062727121142e-05,
"loss": 0.0,
"step": 471000
},
{
"epoch": 0.7526820784242046,
"grad_norm": 0.0020941346883773804,
"learning_rate": 2.069266059125146e-05,
"loss": 0.0,
"step": 471500
},
{
"epoch": 0.7534802566621942,
"grad_norm": 0.00034860908635891974,
"learning_rate": 2.0675249499627675e-05,
"loss": 0.0,
"step": 472000
},
{
"epoch": 0.7542784349001839,
"grad_norm": 0.00023366471577901393,
"learning_rate": 2.06578294796411e-05,
"loss": 0.0,
"step": 472500
},
{
"epoch": 0.7550766131381734,
"grad_norm": 0.0005297983298078179,
"learning_rate": 2.0640400558697097e-05,
"loss": 0.0,
"step": 473000
},
{
"epoch": 0.7558747913761631,
"grad_norm": 0.000132110042613931,
"learning_rate": 2.0622962764215024e-05,
"loss": 0.0,
"step": 473500
},
{
"epoch": 0.7566729696141526,
"grad_norm": 8.032079495023936e-05,
"learning_rate": 2.0605516123628208e-05,
"loss": 0.0,
"step": 474000
},
{
"epoch": 0.7574711478521423,
"grad_norm": 9.18265141081065e-05,
"learning_rate": 2.0588060664383896e-05,
"loss": 0.0,
"step": 474500
},
{
"epoch": 0.7582693260901319,
"grad_norm": 0.04310398921370506,
"learning_rate": 2.057059641394319e-05,
"loss": 0.0,
"step": 475000
},
{
"epoch": 0.7590675043281215,
"grad_norm": 0.0002098032709909603,
"learning_rate": 2.055312339978104e-05,
"loss": 0.0,
"step": 475500
},
{
"epoch": 0.7598656825661111,
"grad_norm": 0.00010694513184716925,
"learning_rate": 2.0535641649386177e-05,
"loss": 0.0,
"step": 476000
},
{
"epoch": 0.7606638608041008,
"grad_norm": 0.0002394245530012995,
"learning_rate": 2.0518151190261075e-05,
"loss": 0.0,
"step": 476500
},
{
"epoch": 0.7614620390420903,
"grad_norm": 0.00031559134367853403,
"learning_rate": 2.0500652049921917e-05,
"loss": 0.0,
"step": 477000
},
{
"epoch": 0.76226021728008,
"grad_norm": 0.0530126690864563,
"learning_rate": 2.048314425589853e-05,
"loss": 0.0,
"step": 477500
},
{
"epoch": 0.7630583955180695,
"grad_norm": 0.0002214965206803754,
"learning_rate": 2.046562783573436e-05,
"loss": 0.0,
"step": 478000
},
{
"epoch": 0.7638565737560592,
"grad_norm": 0.0003776904777623713,
"learning_rate": 2.0448102816986426e-05,
"loss": 0.0,
"step": 478500
},
{
"epoch": 0.7646547519940488,
"grad_norm": 0.0002720048651099205,
"learning_rate": 2.0430569227225278e-05,
"loss": 0.0,
"step": 479000
},
{
"epoch": 0.7654529302320384,
"grad_norm": 0.00013453431893140078,
"learning_rate": 2.0413027094034938e-05,
"loss": 0.0,
"step": 479500
},
{
"epoch": 0.766251108470028,
"grad_norm": 0.00026411519502289593,
"learning_rate": 2.0395476445012888e-05,
"loss": 0.0,
"step": 480000
},
{
"epoch": 0.7670492867080176,
"grad_norm": 0.0005124152521602809,
"learning_rate": 2.0377917307769987e-05,
"loss": 0.0,
"step": 480500
},
{
"epoch": 0.7678474649460072,
"grad_norm": 0.0005802405066788197,
"learning_rate": 2.0360349709930456e-05,
"loss": 0.0,
"step": 481000
},
{
"epoch": 0.7686456431839969,
"grad_norm": 0.0003442351007834077,
"learning_rate": 2.034277367913183e-05,
"loss": 0.0,
"step": 481500
},
{
"epoch": 0.7694438214219864,
"grad_norm": 0.01315320935100317,
"learning_rate": 2.0325189243024906e-05,
"loss": 0.0,
"step": 482000
},
{
"epoch": 0.7702419996599761,
"grad_norm": 0.0007831249386072159,
"learning_rate": 2.0307596429273707e-05,
"loss": 0.0,
"step": 482500
},
{
"epoch": 0.7710401778979656,
"grad_norm": 0.0002610905794426799,
"learning_rate": 2.0289995265555427e-05,
"loss": 0.0,
"step": 483000
},
{
"epoch": 0.7718383561359553,
"grad_norm": 0.0001466882531531155,
"learning_rate": 2.0272385779560415e-05,
"loss": 0.0,
"step": 483500
},
{
"epoch": 0.7726365343739449,
"grad_norm": 0.00014445210399571806,
"learning_rate": 2.0254767998992096e-05,
"loss": 0.0,
"step": 484000
},
{
"epoch": 0.7734347126119345,
"grad_norm": 0.0008763981168158352,
"learning_rate": 2.023714195156695e-05,
"loss": 0.0,
"step": 484500
},
{
"epoch": 0.7742328908499241,
"grad_norm": 0.00019388810324016958,
"learning_rate": 2.0219507665014458e-05,
"loss": 0.0,
"step": 485000
},
{
"epoch": 0.7750310690879137,
"grad_norm": 5447.041015625,
"learning_rate": 2.020186516707707e-05,
"loss": 0.0,
"step": 485500
},
{
"epoch": 0.7758292473259033,
"grad_norm": 0.00018875622481573373,
"learning_rate": 2.0184214485510155e-05,
"loss": 0.0,
"step": 486000
},
{
"epoch": 0.776627425563893,
"grad_norm": 0.0003076701541431248,
"learning_rate": 2.0166555648081948e-05,
"loss": 0.0,
"step": 486500
},
{
"epoch": 0.7774256038018826,
"grad_norm": 0.00028623873367905617,
"learning_rate": 2.0148888682573518e-05,
"loss": 0.0,
"step": 487000
},
{
"epoch": 0.7782237820398722,
"grad_norm": 0.00033153523690998554,
"learning_rate": 2.013121361677873e-05,
"loss": 0.0,
"step": 487500
},
{
"epoch": 0.7790219602778619,
"grad_norm": 0.18243126571178436,
"learning_rate": 2.011353047850418e-05,
"loss": 0.0,
"step": 488000
},
{
"epoch": 0.7798201385158514,
"grad_norm": 0.0001707931951386854,
"learning_rate": 2.009583929556917e-05,
"loss": 0.0,
"step": 488500
},
{
"epoch": 0.7806183167538411,
"grad_norm": 0.00017946858133655041,
"learning_rate": 2.0078140095805653e-05,
"loss": 0.0,
"step": 489000
},
{
"epoch": 0.7814164949918306,
"grad_norm": 0.00014652337995357811,
"learning_rate": 2.0060432907058204e-05,
"loss": 0.0,
"step": 489500
},
{
"epoch": 0.7822146732298203,
"grad_norm": 0.0001990313030546531,
"learning_rate": 2.0042717757183958e-05,
"loss": 0.0,
"step": 490000
},
{
"epoch": 0.7830128514678099,
"grad_norm": 0.0002826680720318109,
"learning_rate": 2.002499467405258e-05,
"loss": 0.0,
"step": 490500
},
{
"epoch": 0.7838110297057995,
"grad_norm": 0.00021341089450288564,
"learning_rate": 2.0007263685546207e-05,
"loss": 0.0,
"step": 491000
},
{
"epoch": 0.7846092079437891,
"grad_norm": 0.0002723548677749932,
"learning_rate": 1.998952481955942e-05,
"loss": 0.0,
"step": 491500
},
{
"epoch": 0.7854073861817787,
"grad_norm": 0.00017047197616193444,
"learning_rate": 1.9971778103999194e-05,
"loss": 0.0,
"step": 492000
},
{
"epoch": 0.7862055644197683,
"grad_norm": 8.363970118807629e-05,
"learning_rate": 1.9954023566784848e-05,
"loss": 0.0,
"step": 492500
},
{
"epoch": 0.787003742657758,
"grad_norm": 0.00022111626458354294,
"learning_rate": 1.9936261235848014e-05,
"loss": 0.0,
"step": 493000
},
{
"epoch": 0.7878019208957475,
"grad_norm": 0.00035100660170428455,
"learning_rate": 1.9918491139132573e-05,
"loss": 0.0,
"step": 493500
},
{
"epoch": 0.7886000991337372,
"grad_norm": 0.00016012179548852146,
"learning_rate": 1.990071330459463e-05,
"loss": 0.0,
"step": 494000
},
{
"epoch": 0.7893982773717267,
"grad_norm": 0.00026644032914191484,
"learning_rate": 1.9882927760202464e-05,
"loss": 0.0,
"step": 494500
},
{
"epoch": 0.7901964556097164,
"grad_norm": 0.0001790735695976764,
"learning_rate": 1.9865134533936485e-05,
"loss": 0.0,
"step": 495000
},
{
"epoch": 0.790994633847706,
"grad_norm": 0.0007223137654364109,
"learning_rate": 1.9847333653789186e-05,
"loss": 0.0,
"step": 495500
},
{
"epoch": 0.7917928120856956,
"grad_norm": 0.00024101993767544627,
"learning_rate": 1.9829525147765096e-05,
"loss": 0.0,
"step": 496000
},
{
"epoch": 0.7925909903236852,
"grad_norm": 0.00025261842529289424,
"learning_rate": 1.9811709043880752e-05,
"loss": 0.0,
"step": 496500
},
{
"epoch": 0.7933891685616749,
"grad_norm": 0.00024634183500893414,
"learning_rate": 1.9793885370164632e-05,
"loss": 0.0,
"step": 497000
},
{
"epoch": 0.7941873467996644,
"grad_norm": 0.0001890936982817948,
"learning_rate": 1.9776054154657133e-05,
"loss": 0.0,
"step": 497500
},
{
"epoch": 0.7949855250376541,
"grad_norm": 0.000774562475271523,
"learning_rate": 1.9758215425410505e-05,
"loss": 0.0,
"step": 498000
},
{
"epoch": 0.7957837032756436,
"grad_norm": 0.00032418197952210903,
"learning_rate": 1.974036921048884e-05,
"loss": 0.0,
"step": 498500
},
{
"epoch": 0.7965818815136333,
"grad_norm": 0.0001678819244261831,
"learning_rate": 1.9722515537967983e-05,
"loss": 0.0,
"step": 499000
},
{
"epoch": 0.7973800597516228,
"grad_norm": 0.00012998198508284986,
"learning_rate": 1.970465443593552e-05,
"loss": 0.0,
"step": 499500
},
{
"epoch": 0.7981782379896125,
"grad_norm": 0.00015611288836225867,
"learning_rate": 1.9686785932490737e-05,
"loss": 0.0,
"step": 500000
},
{
"epoch": 0.7989764162276021,
"grad_norm": 9.851302456809208e-05,
"learning_rate": 1.9668910055744534e-05,
"loss": 0.0,
"step": 500500
},
{
"epoch": 0.7997745944655917,
"grad_norm": 1.1728882789611816,
"learning_rate": 1.9651026833819453e-05,
"loss": 0.0,
"step": 501000
},
{
"epoch": 0.8005727727035813,
"grad_norm": 0.00021649766131304204,
"learning_rate": 1.963313629484955e-05,
"loss": 0.0,
"step": 501500
},
{
"epoch": 0.801370950941571,
"grad_norm": 0.0012857463443651795,
"learning_rate": 1.9615238466980426e-05,
"loss": 0.0,
"step": 502000
},
{
"epoch": 0.8021691291795606,
"grad_norm": 0.001131516881287098,
"learning_rate": 1.9597333378369123e-05,
"loss": 0.0,
"step": 502500
},
{
"epoch": 0.8029673074175502,
"grad_norm": 0.0011782856890931726,
"learning_rate": 1.957942105718412e-05,
"loss": 0.0,
"step": 503000
},
{
"epoch": 0.8037654856555398,
"grad_norm": 0.0001663925067987293,
"learning_rate": 1.9561501531605272e-05,
"loss": 0.0,
"step": 503500
},
{
"epoch": 0.8045636638935294,
"grad_norm": 0.00026339280884712934,
"learning_rate": 1.9543574829823768e-05,
"loss": 0.0,
"step": 504000
},
{
"epoch": 0.8053618421315191,
"grad_norm": 0.0003007679770234972,
"learning_rate": 1.952564098004208e-05,
"loss": 0.0,
"step": 504500
},
{
"epoch": 0.8061600203695086,
"grad_norm": 0.00020266381034161896,
"learning_rate": 1.9507700010473938e-05,
"loss": 0.0,
"step": 505000
},
{
"epoch": 0.8069581986074983,
"grad_norm": 0.0002320464700460434,
"learning_rate": 1.9489751949344265e-05,
"loss": 0.0,
"step": 505500
},
{
"epoch": 0.8077563768454878,
"grad_norm": 0.00040771221392787993,
"learning_rate": 1.9471796824889136e-05,
"loss": 0.0,
"step": 506000
},
{
"epoch": 0.8085545550834775,
"grad_norm": 0.00015795578656252474,
"learning_rate": 1.9453834665355747e-05,
"loss": 0.0,
"step": 506500
},
{
"epoch": 0.8093527333214671,
"grad_norm": 0.00014850639854557812,
"learning_rate": 1.9435865499002358e-05,
"loss": 0.0,
"step": 507000
},
{
"epoch": 0.8101509115594567,
"grad_norm": 0.023478705435991287,
"learning_rate": 1.941788935409825e-05,
"loss": 0.0,
"step": 507500
},
{
"epoch": 0.8109490897974463,
"grad_norm": 9.967546793632209e-05,
"learning_rate": 1.9399906258923688e-05,
"loss": 0.0,
"step": 508000
},
{
"epoch": 0.811747268035436,
"grad_norm": 0.013064282946288586,
"learning_rate": 1.938191624176987e-05,
"loss": 0.0,
"step": 508500
},
{
"epoch": 0.8125454462734255,
"grad_norm": 0.0002515662636142224,
"learning_rate": 1.9363919330938877e-05,
"loss": 0.0,
"step": 509000
},
{
"epoch": 0.8133436245114152,
"grad_norm": 9.188640251522884e-05,
"learning_rate": 1.9345915554743648e-05,
"loss": 0.0,
"step": 509500
},
{
"epoch": 0.8141418027494047,
"grad_norm": 0.0002370925503782928,
"learning_rate": 1.9327904941507905e-05,
"loss": 0.0,
"step": 510000
},
{
"epoch": 0.8149399809873944,
"grad_norm": 0.0002923431165982038,
"learning_rate": 1.9309887519566138e-05,
"loss": 0.0,
"step": 510500
},
{
"epoch": 0.815738159225384,
"grad_norm": 0.0011024456471204758,
"learning_rate": 1.9291863317263552e-05,
"loss": 0.0,
"step": 511000
},
{
"epoch": 0.8165363374633736,
"grad_norm": 0.000260900822468102,
"learning_rate": 1.9273832362956013e-05,
"loss": 0.0,
"step": 511500
},
{
"epoch": 0.8173345157013632,
"grad_norm": 0.0003531461698003113,
"learning_rate": 1.9255794685010005e-05,
"loss": 0.0,
"step": 512000
},
{
"epoch": 0.8181326939393528,
"grad_norm": 0.00018732767784968019,
"learning_rate": 1.9237750311802607e-05,
"loss": 0.0,
"step": 512500
},
{
"epoch": 0.8189308721773424,
"grad_norm": 0.000320764520438388,
"learning_rate": 1.9219699271721395e-05,
"loss": 0.0,
"step": 513000
},
{
"epoch": 0.8197290504153321,
"grad_norm": 0.0001915542088681832,
"learning_rate": 1.920164159316448e-05,
"loss": 0.0,
"step": 513500
},
{
"epoch": 0.8205272286533216,
"grad_norm": 0.0005298721953295171,
"learning_rate": 1.9183577304540377e-05,
"loss": 0.0,
"step": 514000
},
{
"epoch": 0.8213254068913113,
"grad_norm": 0.000173246517078951,
"learning_rate": 1.916550643426803e-05,
"loss": 0.0,
"step": 514500
},
{
"epoch": 0.8221235851293008,
"grad_norm": 0.00034074197174049914,
"learning_rate": 1.9147429010776717e-05,
"loss": 0.0,
"step": 515000
},
{
"epoch": 0.8229217633672905,
"grad_norm": 0.0003810925700236112,
"learning_rate": 1.9129345062506034e-05,
"loss": 0.0,
"step": 515500
},
{
"epoch": 0.8237199416052801,
"grad_norm": 0.0002107440959662199,
"learning_rate": 1.911125461790584e-05,
"loss": 0.0,
"step": 516000
},
{
"epoch": 0.8245181198432697,
"grad_norm": 0.00023204906028695405,
"learning_rate": 1.9093157705436212e-05,
"loss": 0.0,
"step": 516500
},
{
"epoch": 0.8253162980812593,
"grad_norm": 0.00024208931426983327,
"learning_rate": 1.9075054353567416e-05,
"loss": 0.0,
"step": 517000
},
{
"epoch": 0.826114476319249,
"grad_norm": 0.00014883586845826358,
"learning_rate": 1.905694459077982e-05,
"loss": 0.0,
"step": 517500
},
{
"epoch": 0.8269126545572386,
"grad_norm": 0.017815813422203064,
"learning_rate": 1.9038828445563912e-05,
"loss": 0.0,
"step": 518000
},
{
"epoch": 0.8277108327952282,
"grad_norm": 0.0003795297525357455,
"learning_rate": 1.902070594642019e-05,
"loss": 0.0,
"step": 518500
},
{
"epoch": 0.8285090110332178,
"grad_norm": 0.00012054865510435775,
"learning_rate": 1.9002577121859175e-05,
"loss": 0.0,
"step": 519000
},
{
"epoch": 0.8293071892712074,
"grad_norm": 5.091236591339111,
"learning_rate": 1.8984442000401316e-05,
"loss": 0.0,
"step": 519500
},
{
"epoch": 0.8301053675091971,
"grad_norm": 0.00017892369942273945,
"learning_rate": 1.8966300610576983e-05,
"loss": 0.0,
"step": 520000
},
{
"epoch": 0.8309035457471866,
"grad_norm": 0.016385966911911964,
"learning_rate": 1.8948152980926404e-05,
"loss": 0.0,
"step": 520500
},
{
"epoch": 0.8317017239851763,
"grad_norm": 0.004711247514933348,
"learning_rate": 1.892999913999962e-05,
"loss": 0.0,
"step": 521000
},
{
"epoch": 0.8324999022231658,
"grad_norm": 0.0005881072720512748,
"learning_rate": 1.8911839116356453e-05,
"loss": 0.0,
"step": 521500
},
{
"epoch": 0.8332980804611555,
"grad_norm": 0.013069476932287216,
"learning_rate": 1.8893672938566436e-05,
"loss": 0.0,
"step": 522000
},
{
"epoch": 0.834096258699145,
"grad_norm": 0.00010810209641931579,
"learning_rate": 1.8875500635208797e-05,
"loss": 0.0,
"step": 522500
},
{
"epoch": 0.8348944369371347,
"grad_norm": 7.931066647870466e-05,
"learning_rate": 1.8857322234872397e-05,
"loss": 0.0,
"step": 523000
},
{
"epoch": 0.8356926151751243,
"grad_norm": 0.00013571855379268527,
"learning_rate": 1.883913776615569e-05,
"loss": 0.0,
"step": 523500
},
{
"epoch": 0.8364907934131139,
"grad_norm": 8.749699190957472e-05,
"learning_rate": 1.8820947257666667e-05,
"loss": 0.0,
"step": 524000
},
{
"epoch": 0.8372889716511035,
"grad_norm": 100.56954193115234,
"learning_rate": 1.8802750738022838e-05,
"loss": 0.0,
"step": 524500
},
{
"epoch": 0.8380871498890932,
"grad_norm": 0.0004932993906550109,
"learning_rate": 1.8784548235851168e-05,
"loss": 0.0,
"step": 525000
},
{
"epoch": 0.8388853281270827,
"grad_norm": 9.414002124685794e-05,
"learning_rate": 1.8766339779788005e-05,
"loss": 0.0,
"step": 525500
},
{
"epoch": 0.8396835063650724,
"grad_norm": 217.5888671875,
"learning_rate": 1.87481253984791e-05,
"loss": 0.0,
"step": 526000
},
{
"epoch": 0.8404816846030619,
"grad_norm": 0.0001938005443662405,
"learning_rate": 1.8729905120579513e-05,
"loss": 0.0,
"step": 526500
},
{
"epoch": 0.8412798628410516,
"grad_norm": 7.117674977052957e-05,
"learning_rate": 1.871167897475357e-05,
"loss": 0.0,
"step": 527000
},
{
"epoch": 0.8420780410790412,
"grad_norm": 0.0006304022972472012,
"learning_rate": 1.8693446989674846e-05,
"loss": 0.0,
"step": 527500
},
{
"epoch": 0.8428762193170308,
"grad_norm": 0.00013564492110162973,
"learning_rate": 1.8675209194026088e-05,
"loss": 0.0,
"step": 528000
},
{
"epoch": 0.8436743975550204,
"grad_norm": 372.4085388183594,
"learning_rate": 1.8656965616499194e-05,
"loss": 0.0,
"step": 528500
},
{
"epoch": 0.84447257579301,
"grad_norm": 0.031750548630952835,
"learning_rate": 1.8638716285795146e-05,
"loss": 0.0,
"step": 529000
},
{
"epoch": 0.8452707540309996,
"grad_norm": 0.00014481242396868765,
"learning_rate": 1.8620461230623994e-05,
"loss": 0.0,
"step": 529500
},
{
"epoch": 0.8460689322689893,
"grad_norm": 0.0007552816532552242,
"learning_rate": 1.8602200479704776e-05,
"loss": 0.0,
"step": 530000
},
{
"epoch": 0.8468671105069788,
"grad_norm": 0.00011971026106039062,
"learning_rate": 1.858393406176551e-05,
"loss": 0.0,
"step": 530500
},
{
"epoch": 0.8476652887449685,
"grad_norm": 9.909499931382015e-05,
"learning_rate": 1.8565662005543106e-05,
"loss": 0.0,
"step": 531000
},
{
"epoch": 0.848463466982958,
"grad_norm": 0.0002251157711725682,
"learning_rate": 1.8547384339783368e-05,
"loss": 0.0,
"step": 531500
},
{
"epoch": 0.8492616452209477,
"grad_norm": 0.00016564581892453134,
"learning_rate": 1.852910109324091e-05,
"loss": 0.0,
"step": 532000
},
{
"epoch": 0.8500598234589373,
"grad_norm": 0.00014557143731508404,
"learning_rate": 1.8510812294679122e-05,
"loss": 0.0,
"step": 532500
},
{
"epoch": 0.8508580016969269,
"grad_norm": 0.00032126580481417477,
"learning_rate": 1.849251797287015e-05,
"loss": 0.0,
"step": 533000
},
{
"epoch": 0.8516561799349166,
"grad_norm": 0.0002472183550707996,
"learning_rate": 1.8474218156594805e-05,
"loss": 0.0,
"step": 533500
},
{
"epoch": 0.8524543581729062,
"grad_norm": 0.0003146003873553127,
"learning_rate": 1.8455912874642562e-05,
"loss": 0.0,
"step": 534000
},
{
"epoch": 0.8532525364108958,
"grad_norm": 0.00025272014318034053,
"learning_rate": 1.8437602155811473e-05,
"loss": 0.0,
"step": 534500
},
{
"epoch": 0.8540507146488854,
"grad_norm": 0.00020710949320346117,
"learning_rate": 1.8419286028908164e-05,
"loss": 0.0,
"step": 535000
},
{
"epoch": 0.854848892886875,
"grad_norm": 0.00043446040945127606,
"learning_rate": 1.840096452274776e-05,
"loss": 0.0,
"step": 535500
},
{
"epoch": 0.8556470711248646,
"grad_norm": 0.0002756445901468396,
"learning_rate": 1.8382637666153842e-05,
"loss": 0.0,
"step": 536000
},
{
"epoch": 0.8564452493628543,
"grad_norm": 0.00025066762464120984,
"learning_rate": 1.8364305487958422e-05,
"loss": 0.0,
"step": 536500
},
{
"epoch": 0.8572434276008438,
"grad_norm": 0.00021802319679409266,
"learning_rate": 1.8345968017001875e-05,
"loss": 0.0,
"step": 537000
},
{
"epoch": 0.8580416058388335,
"grad_norm": 0.0005224825581535697,
"learning_rate": 1.8327625282132908e-05,
"loss": 0.0,
"step": 537500
},
{
"epoch": 0.858839784076823,
"grad_norm": 0.00023706798674538732,
"learning_rate": 1.8309277312208506e-05,
"loss": 0.0,
"step": 538000
},
{
"epoch": 0.8596379623148127,
"grad_norm": 0.003222405444830656,
"learning_rate": 1.8290924136093882e-05,
"loss": 0.0,
"step": 538500
},
{
"epoch": 0.8604361405528023,
"grad_norm": 0.002675387542694807,
"learning_rate": 1.8272565782662458e-05,
"loss": 0.0,
"step": 539000
},
{
"epoch": 0.8612343187907919,
"grad_norm": 0.00026413999148644507,
"learning_rate": 1.8254202280795784e-05,
"loss": 0.0,
"step": 539500
},
{
"epoch": 0.8620324970287815,
"grad_norm": 0.0013028520625084639,
"learning_rate": 1.823583365938352e-05,
"loss": 0.0,
"step": 540000
},
{
"epoch": 0.8628306752667712,
"grad_norm": 0.00041675748070701957,
"learning_rate": 1.8217459947323374e-05,
"loss": 0.0,
"step": 540500
},
{
"epoch": 0.8636288535047607,
"grad_norm": 0.0011459665838629007,
"learning_rate": 1.819908117352107e-05,
"loss": 0.0,
"step": 541000
},
{
"epoch": 0.8644270317427504,
"grad_norm": 0.0003427125629968941,
"learning_rate": 1.818069736689028e-05,
"loss": 0.0,
"step": 541500
},
{
"epoch": 0.8652252099807399,
"grad_norm": 0.00021466145699378103,
"learning_rate": 1.816230855635261e-05,
"loss": 0.0,
"step": 542000
},
{
"epoch": 0.8660233882187296,
"grad_norm": 0.00012651206634473056,
"learning_rate": 1.8143914770837535e-05,
"loss": 0.0,
"step": 542500
},
{
"epoch": 0.8668215664567192,
"grad_norm": 0.0002110886125592515,
"learning_rate": 1.8125516039282347e-05,
"loss": 0.0,
"step": 543000
},
{
"epoch": 0.8676197446947088,
"grad_norm": 0.0003328848397359252,
"learning_rate": 1.8107112390632135e-05,
"loss": 0.0,
"step": 543500
},
{
"epoch": 0.8684179229326984,
"grad_norm": 0.005305567290633917,
"learning_rate": 1.8088703853839707e-05,
"loss": 0.0,
"step": 544000
},
{
"epoch": 0.869216101170688,
"grad_norm": 0.0003353380016051233,
"learning_rate": 1.8070290457865575e-05,
"loss": 0.0,
"step": 544500
},
{
"epoch": 0.8700142794086776,
"grad_norm": 0.11375279724597931,
"learning_rate": 1.8051872231677876e-05,
"loss": 0.0,
"step": 545000
},
{
"epoch": 0.8708124576466673,
"grad_norm": 0.00021715887123718858,
"learning_rate": 1.8033449204252376e-05,
"loss": 0.0,
"step": 545500
},
{
"epoch": 0.8716106358846568,
"grad_norm": 0.0002547954791225493,
"learning_rate": 1.801502140457236e-05,
"loss": 0.0,
"step": 546000
},
{
"epoch": 0.8724088141226465,
"grad_norm": 0.0003588471154216677,
"learning_rate": 1.7996588861628653e-05,
"loss": 0.0,
"step": 546500
},
{
"epoch": 0.873206992360636,
"grad_norm": 0.0005377253983169794,
"learning_rate": 1.797815160441952e-05,
"loss": 0.0,
"step": 547000
},
{
"epoch": 0.8740051705986257,
"grad_norm": 4.066957473754883,
"learning_rate": 1.7959709661950656e-05,
"loss": 0.0,
"step": 547500
},
{
"epoch": 0.8748033488366153,
"grad_norm": 0.0021291342563927174,
"learning_rate": 1.7941263063235112e-05,
"loss": 0.0,
"step": 548000
},
{
"epoch": 0.8756015270746049,
"grad_norm": 0.005908517632633448,
"learning_rate": 1.792281183729328e-05,
"loss": 0.0,
"step": 548500
},
{
"epoch": 0.8763997053125945,
"grad_norm": 0.00016735069220885634,
"learning_rate": 1.790435601315282e-05,
"loss": 0.0,
"step": 549000
},
{
"epoch": 0.8771978835505841,
"grad_norm": 0.11726677417755127,
"learning_rate": 1.7885895619848632e-05,
"loss": 0.0,
"step": 549500
},
{
"epoch": 0.8779960617885738,
"grad_norm": 0.0001299067516811192,
"learning_rate": 1.7867430686422805e-05,
"loss": 0.0,
"step": 550000
},
{
"epoch": 0.8787942400265634,
"grad_norm": 0.0008645313209854066,
"learning_rate": 1.7848961241924568e-05,
"loss": 0.0,
"step": 550500
},
{
"epoch": 0.879592418264553,
"grad_norm": 0.001213490148074925,
"learning_rate": 1.7830487315410244e-05,
"loss": 0.0,
"step": 551000
},
{
"epoch": 0.8803905965025426,
"grad_norm": 0.0003075381100643426,
"learning_rate": 1.7812008935943214e-05,
"loss": 0.0,
"step": 551500
},
{
"epoch": 0.8811887747405323,
"grad_norm": 0.00016041690832935274,
"learning_rate": 1.779352613259386e-05,
"loss": 0.0,
"step": 552000
},
{
"epoch": 0.8819869529785218,
"grad_norm": 0.00022118906781543046,
"learning_rate": 1.777503893443952e-05,
"loss": 0.0,
"step": 552500
},
{
"epoch": 0.8827851312165115,
"grad_norm": 0.0002528753539081663,
"learning_rate": 1.7756547370564453e-05,
"loss": 0.0,
"step": 553000
},
{
"epoch": 0.883583309454501,
"grad_norm": 0.00020087572920601815,
"learning_rate": 1.7738051470059794e-05,
"loss": 0.0,
"step": 553500
},
{
"epoch": 0.8843814876924907,
"grad_norm": 0.0002061546838376671,
"learning_rate": 1.7719551262023474e-05,
"loss": 0.0,
"step": 554000
},
{
"epoch": 0.8851796659304803,
"grad_norm": 0.004588930867612362,
"learning_rate": 1.7701046775560224e-05,
"loss": 0.0,
"step": 554500
},
{
"epoch": 0.8859778441684699,
"grad_norm": 0.00019890641851816326,
"learning_rate": 1.76825380397815e-05,
"loss": 0.0,
"step": 555000
},
{
"epoch": 0.8867760224064595,
"grad_norm": 0.0015211553545668721,
"learning_rate": 1.766402508380544e-05,
"loss": 0.0,
"step": 555500
},
{
"epoch": 0.8875742006444491,
"grad_norm": 0.00012981586041860282,
"learning_rate": 1.7645507936756825e-05,
"loss": 0.0,
"step": 556000
},
{
"epoch": 0.8883723788824387,
"grad_norm": 0.0001142864057328552,
"learning_rate": 1.7626986627767025e-05,
"loss": 0.0,
"step": 556500
},
{
"epoch": 0.8891705571204284,
"grad_norm": 0.00013470168050844222,
"learning_rate": 1.760846118597396e-05,
"loss": 0.0,
"step": 557000
},
{
"epoch": 0.8899687353584179,
"grad_norm": 0.00016007563681341708,
"learning_rate": 1.7589931640522053e-05,
"loss": 0.0,
"step": 557500
},
{
"epoch": 0.8907669135964076,
"grad_norm": 0.00014388897398021072,
"learning_rate": 1.757139802056218e-05,
"loss": 0.0,
"step": 558000
},
{
"epoch": 0.8915650918343971,
"grad_norm": 0.00018686757539398968,
"learning_rate": 1.7552860355251632e-05,
"loss": 0.0,
"step": 558500
},
{
"epoch": 0.8923632700723868,
"grad_norm": 0.002567690797150135,
"learning_rate": 1.7534318673754057e-05,
"loss": 0.0,
"step": 559000
},
{
"epoch": 0.8931614483103764,
"grad_norm": 0.00014588158228434622,
"learning_rate": 1.751577300523943e-05,
"loss": 0.0,
"step": 559500
},
{
"epoch": 0.893959626548366,
"grad_norm": 0.00012244051322340965,
"learning_rate": 1.749722337888399e-05,
"loss": 0.0,
"step": 560000
},
{
"epoch": 0.8947578047863556,
"grad_norm": 0.0001751197996782139,
"learning_rate": 1.7478669823870202e-05,
"loss": 0.0,
"step": 560500
},
{
"epoch": 0.8955559830243452,
"grad_norm": 0.0001905184908537194,
"learning_rate": 1.7460112369386723e-05,
"loss": 0.0,
"step": 561000
},
{
"epoch": 0.8963541612623348,
"grad_norm": 0.00021194595319684595,
"learning_rate": 1.7441551044628338e-05,
"loss": 0.0,
"step": 561500
},
{
"epoch": 0.8971523395003245,
"grad_norm": 0.00022341775184031576,
"learning_rate": 1.742298587879592e-05,
"loss": 0.0,
"step": 562000
},
{
"epoch": 0.897950517738314,
"grad_norm": 0.00014410440053325146,
"learning_rate": 1.7404416901096373e-05,
"loss": 0.0,
"step": 562500
},
{
"epoch": 0.8987486959763037,
"grad_norm": 8.38216656120494e-05,
"learning_rate": 1.738584414074263e-05,
"loss": 0.0,
"step": 563000
},
{
"epoch": 0.8995468742142932,
"grad_norm": 0.0003483085019979626,
"learning_rate": 1.736726762695354e-05,
"loss": 0.0,
"step": 563500
},
{
"epoch": 0.9003450524522829,
"grad_norm": 0.006742103956639767,
"learning_rate": 1.7348687388953877e-05,
"loss": 0.0,
"step": 564000
},
{
"epoch": 0.9011432306902725,
"grad_norm": 0.00015459113637916744,
"learning_rate": 1.7330103455974265e-05,
"loss": 0.0,
"step": 564500
},
{
"epoch": 0.9019414089282621,
"grad_norm": 0.00011757721949834377,
"learning_rate": 1.7311515857251152e-05,
"loss": 0.0,
"step": 565000
},
{
"epoch": 0.9027395871662518,
"grad_norm": 0.0001631448103580624,
"learning_rate": 1.7292924622026736e-05,
"loss": 0.0,
"step": 565500
},
{
"epoch": 0.9035377654042414,
"grad_norm": 0.000231702157179825,
"learning_rate": 1.727432977954896e-05,
"loss": 0.0,
"step": 566000
},
{
"epoch": 0.904335943642231,
"grad_norm": 0.00016405931091867387,
"learning_rate": 1.725573135907141e-05,
"loss": 0.0,
"step": 566500
},
{
"epoch": 0.9051341218802206,
"grad_norm": 0.0002725492522586137,
"learning_rate": 1.7237129389853332e-05,
"loss": 0.0,
"step": 567000
},
{
"epoch": 0.9059323001182102,
"grad_norm": 0.00022207711299415678,
"learning_rate": 1.7218523901159536e-05,
"loss": 0.0,
"step": 567500
},
{
"epoch": 0.9067304783561998,
"grad_norm": 0.010523856617510319,
"learning_rate": 1.7199914922260375e-05,
"loss": 0.0,
"step": 568000
},
{
"epoch": 0.9075286565941895,
"grad_norm": 0.00027734291506931186,
"learning_rate": 1.7181302482431694e-05,
"loss": 0.0,
"step": 568500
},
{
"epoch": 0.908326834832179,
"grad_norm": 0.00016665668226778507,
"learning_rate": 1.7162686610954778e-05,
"loss": 0.0,
"step": 569000
},
{
"epoch": 0.9091250130701687,
"grad_norm": 0.00012514390982687473,
"learning_rate": 1.714406733711632e-05,
"loss": 0.0,
"step": 569500
},
{
"epoch": 0.9099231913081582,
"grad_norm": 0.0009053830290213227,
"learning_rate": 1.7125444690208352e-05,
"loss": 0.0001,
"step": 570000
},
{
"epoch": 0.9107213695461479,
"grad_norm": 0.000333428499288857,
"learning_rate": 1.710681869952822e-05,
"loss": 0.0,
"step": 570500
},
{
"epoch": 0.9115195477841375,
"grad_norm": 0.0003971235710196197,
"learning_rate": 1.7088189394378537e-05,
"loss": 0.0,
"step": 571000
},
{
"epoch": 0.9123177260221271,
"grad_norm": 0.0004157690273132175,
"learning_rate": 1.706955680406712e-05,
"loss": 0.0,
"step": 571500
},
{
"epoch": 0.9131159042601167,
"grad_norm": 0.00027369658346287906,
"learning_rate": 1.7050920957906956e-05,
"loss": 0.0,
"step": 572000
},
{
"epoch": 0.9139140824981064,
"grad_norm": 4293.71044921875,
"learning_rate": 1.703228188521616e-05,
"loss": 0.0,
"step": 572500
},
{
"epoch": 0.9147122607360959,
"grad_norm": 0.0003048023791052401,
"learning_rate": 1.7013639615317912e-05,
"loss": 0.0,
"step": 573000
},
{
"epoch": 0.9155104389740856,
"grad_norm": 312.3310546875,
"learning_rate": 1.699499417754044e-05,
"loss": 0.0,
"step": 573500
},
{
"epoch": 0.9163086172120751,
"grad_norm": 0.0006332839257083833,
"learning_rate": 1.6976345601216934e-05,
"loss": 0.0,
"step": 574000
},
{
"epoch": 0.9171067954500648,
"grad_norm": 0.0004323949106037617,
"learning_rate": 1.695769391568554e-05,
"loss": 0.0,
"step": 574500
},
{
"epoch": 0.9179049736880543,
"grad_norm": 0.0002921383420471102,
"learning_rate": 1.6939039150289284e-05,
"loss": 0.0,
"step": 575000
},
{
"epoch": 0.918703151926044,
"grad_norm": 0.00022676597291138023,
"learning_rate": 1.692038133437604e-05,
"loss": 0.0,
"step": 575500
},
{
"epoch": 0.9195013301640336,
"grad_norm": 0.00021034496603533626,
"learning_rate": 1.690172049729849e-05,
"loss": 0.0,
"step": 576000
},
{
"epoch": 0.9202995084020232,
"grad_norm": 0.00011297802848275751,
"learning_rate": 1.6883056668414055e-05,
"loss": 0.0,
"step": 576500
},
{
"epoch": 0.9210976866400128,
"grad_norm": 0.0002499468100722879,
"learning_rate": 1.6864389877084864e-05,
"loss": 0.0,
"step": 577000
},
{
"epoch": 0.9218958648780025,
"grad_norm": 0.0009551959810778499,
"learning_rate": 1.6845720152677714e-05,
"loss": 0.0,
"step": 577500
},
{
"epoch": 0.922694043115992,
"grad_norm": 0.00026030378649011254,
"learning_rate": 1.6827047524564023e-05,
"loss": 0.0,
"step": 578000
},
{
"epoch": 0.9234922213539817,
"grad_norm": 0.0013032422866672277,
"learning_rate": 1.6808372022119757e-05,
"loss": 0.0,
"step": 578500
},
{
"epoch": 0.9242903995919712,
"grad_norm": 0.04895612224936485,
"learning_rate": 1.6789693674725426e-05,
"loss": 0.0,
"step": 579000
},
{
"epoch": 0.9250885778299609,
"grad_norm": 0.0001584753772476688,
"learning_rate": 1.677101251176599e-05,
"loss": 0.0,
"step": 579500
},
{
"epoch": 0.9258867560679505,
"grad_norm": 0.00018785694555845112,
"learning_rate": 1.6752328562630863e-05,
"loss": 0.0,
"step": 580000
},
{
"epoch": 0.9266849343059401,
"grad_norm": 0.00013747978664468974,
"learning_rate": 1.673364185671383e-05,
"loss": 0.0,
"step": 580500
},
{
"epoch": 0.9274831125439298,
"grad_norm": 0.0001757456484483555,
"learning_rate": 1.671495242341301e-05,
"loss": 0.0,
"step": 581000
},
{
"epoch": 0.9282812907819193,
"grad_norm": 0.00012764699931722134,
"learning_rate": 1.6696260292130827e-05,
"loss": 0.0,
"step": 581500
},
{
"epoch": 0.929079469019909,
"grad_norm": 0.00016263512952718884,
"learning_rate": 1.6677565492273935e-05,
"loss": 0.0,
"step": 582000
},
{
"epoch": 0.9298776472578986,
"grad_norm": 0.00020701033645309508,
"learning_rate": 1.66588680532532e-05,
"loss": 0.0,
"step": 582500
},
{
"epoch": 0.9306758254958882,
"grad_norm": 0.0002117520198225975,
"learning_rate": 1.6640168004483616e-05,
"loss": 0.0,
"step": 583000
},
{
"epoch": 0.9314740037338778,
"grad_norm": 0.00017498256056569517,
"learning_rate": 1.662146537538431e-05,
"loss": 0.0,
"step": 583500
},
{
"epoch": 0.9322721819718675,
"grad_norm": 0.00016180331294890493,
"learning_rate": 1.660276019537845e-05,
"loss": 0.0,
"step": 584000
},
{
"epoch": 0.933070360209857,
"grad_norm": 0.00016664137365296483,
"learning_rate": 1.658405249389323e-05,
"loss": 0.0,
"step": 584500
},
{
"epoch": 0.9338685384478467,
"grad_norm": 0.0004110218142159283,
"learning_rate": 1.65653423003598e-05,
"loss": 0.0,
"step": 585000
},
{
"epoch": 0.9346667166858362,
"grad_norm": 0.00035451291478239,
"learning_rate": 1.6546629644213244e-05,
"loss": 0.0,
"step": 585500
},
{
"epoch": 0.9354648949238259,
"grad_norm": 0.00023166697064880282,
"learning_rate": 1.6527914554892503e-05,
"loss": 0.0,
"step": 586000
},
{
"epoch": 0.9362630731618155,
"grad_norm": 0.00042415011557750404,
"learning_rate": 1.650919706184035e-05,
"loss": 0.0,
"step": 586500
},
{
"epoch": 0.9370612513998051,
"grad_norm": 0.000651057343930006,
"learning_rate": 1.6490477194503354e-05,
"loss": 0.0,
"step": 587000
},
{
"epoch": 0.9378594296377947,
"grad_norm": 0.0006024042959325016,
"learning_rate": 1.6471754982331805e-05,
"loss": 0.0,
"step": 587500
},
{
"epoch": 0.9386576078757843,
"grad_norm": 0.00014395530160982162,
"learning_rate": 1.645303045477969e-05,
"loss": 0.0,
"step": 588000
},
{
"epoch": 0.9394557861137739,
"grad_norm": 0.00016363435133825988,
"learning_rate": 1.6434303641304624e-05,
"loss": 0.0,
"step": 588500
},
{
"epoch": 0.9402539643517636,
"grad_norm": 0.00018911808729171753,
"learning_rate": 1.6415574571367838e-05,
"loss": 0.0,
"step": 589000
},
{
"epoch": 0.9410521425897531,
"grad_norm": 0.00025503954384475946,
"learning_rate": 1.6396843274434104e-05,
"loss": 0.0,
"step": 589500
},
{
"epoch": 0.9418503208277428,
"grad_norm": 0.000276244361884892,
"learning_rate": 1.6378109779971688e-05,
"loss": 0.0,
"step": 590000
},
{
"epoch": 0.9426484990657323,
"grad_norm": 0.00021976447897031903,
"learning_rate": 1.6359374117452336e-05,
"loss": 0.0,
"step": 590500
},
{
"epoch": 0.943446677303722,
"grad_norm": 0.00018629759142640978,
"learning_rate": 1.634063631635118e-05,
"loss": 0.0,
"step": 591000
},
{
"epoch": 0.9442448555417116,
"grad_norm": 0.00018371363694313914,
"learning_rate": 1.6321896406146738e-05,
"loss": 0.0,
"step": 591500
},
{
"epoch": 0.9450430337797012,
"grad_norm": 0.0001642345596337691,
"learning_rate": 1.6303154416320825e-05,
"loss": 0.0,
"step": 592000
},
{
"epoch": 0.9458412120176908,
"grad_norm": 0.0002542451547924429,
"learning_rate": 1.6284410376358545e-05,
"loss": 0.0,
"step": 592500
},
{
"epoch": 0.9466393902556804,
"grad_norm": 0.0003169816918671131,
"learning_rate": 1.6265664315748215e-05,
"loss": 0.0,
"step": 593000
},
{
"epoch": 0.94743756849367,
"grad_norm": 147.49600219726562,
"learning_rate": 1.6246916263981344e-05,
"loss": 0.0,
"step": 593500
},
{
"epoch": 0.9482357467316597,
"grad_norm": 0.0002796686312649399,
"learning_rate": 1.6228166250552565e-05,
"loss": 0.0,
"step": 594000
},
{
"epoch": 0.9490339249696492,
"grad_norm": 0.00013148550351615995,
"learning_rate": 1.620941430495959e-05,
"loss": 0.0,
"step": 594500
},
{
"epoch": 0.9498321032076389,
"grad_norm": 0.0008374619064852595,
"learning_rate": 1.6190660456703192e-05,
"loss": 0.0,
"step": 595000
},
{
"epoch": 0.9506302814456284,
"grad_norm": 0.00010609177843434736,
"learning_rate": 1.6171904735287114e-05,
"loss": 0.0,
"step": 595500
},
{
"epoch": 0.9514284596836181,
"grad_norm": 0.00019812423852272332,
"learning_rate": 1.6153147170218062e-05,
"loss": 0.0,
"step": 596000
},
{
"epoch": 0.9522266379216078,
"grad_norm": 0.000553951773326844,
"learning_rate": 1.6134387791005628e-05,
"loss": 0.0,
"step": 596500
},
{
"epoch": 0.9530248161595973,
"grad_norm": 0.00015377177624031901,
"learning_rate": 1.611562662716228e-05,
"loss": 0.0,
"step": 597000
},
{
"epoch": 0.953822994397587,
"grad_norm": 0.00025437449221499264,
"learning_rate": 1.609686370820327e-05,
"loss": 0.0,
"step": 597500
},
{
"epoch": 0.9546211726355766,
"grad_norm": 0.00011710778926499188,
"learning_rate": 1.607809906364662e-05,
"loss": 0.0,
"step": 598000
},
{
"epoch": 0.9554193508735662,
"grad_norm": 0.00018913969688583165,
"learning_rate": 1.6059332723013078e-05,
"loss": 0.0,
"step": 598500
},
{
"epoch": 0.9562175291115558,
"grad_norm": 0.07491962611675262,
"learning_rate": 1.6040564715826045e-05,
"loss": 0.0,
"step": 599000
},
{
"epoch": 0.9570157073495454,
"grad_norm": 0.00014903175178915262,
"learning_rate": 1.6021795071611546e-05,
"loss": 0.0,
"step": 599500
},
{
"epoch": 0.957813885587535,
"grad_norm": 0.0003374506486579776,
"learning_rate": 1.6003023819898188e-05,
"loss": 0.0,
"step": 600000
},
{
"epoch": 0.957813885587535,
"eval_loss": 1.3122987184033263e-05,
"eval_runtime": 22158.8784,
"eval_samples_per_second": 100.515,
"eval_steps_per_second": 3.141,
"step": 600000
},
{
"epoch": 0.9586120638255247,
"grad_norm": 0.002838114043697715,
"learning_rate": 1.5984250990217106e-05,
"loss": 0.0,
"step": 600500
},
{
"epoch": 0.9594102420635142,
"grad_norm": 4460.6826171875,
"learning_rate": 1.596547661210191e-05,
"loss": 0.0,
"step": 601000
},
{
"epoch": 0.9602084203015039,
"grad_norm": 0.00011342266952851787,
"learning_rate": 1.594670071508865e-05,
"loss": 0.0,
"step": 601500
},
{
"epoch": 0.9610065985394934,
"grad_norm": 0.00016833537665661424,
"learning_rate": 1.592792332871578e-05,
"loss": 0.0,
"step": 602000
},
{
"epoch": 0.9618047767774831,
"grad_norm": 0.0022102862130850554,
"learning_rate": 1.5909144482524065e-05,
"loss": 0.0,
"step": 602500
},
{
"epoch": 0.9626029550154727,
"grad_norm": 0.00012462472659535706,
"learning_rate": 1.5890364206056598e-05,
"loss": 0.0,
"step": 603000
},
{
"epoch": 0.9634011332534623,
"grad_norm": 0.00019439893367234617,
"learning_rate": 1.58715825288587e-05,
"loss": 0.0,
"step": 603500
},
{
"epoch": 0.9641993114914519,
"grad_norm": 0.00014579604612663388,
"learning_rate": 1.5852799480477917e-05,
"loss": 0.0,
"step": 604000
},
{
"epoch": 0.9649974897294415,
"grad_norm": 0.0004376780125312507,
"learning_rate": 1.5834015090463934e-05,
"loss": 0.0,
"step": 604500
},
{
"epoch": 0.9657956679674311,
"grad_norm": 0.013489479199051857,
"learning_rate": 1.5815229388368547e-05,
"loss": 0.0,
"step": 605000
},
{
"epoch": 0.9665938462054208,
"grad_norm": 0.0001327054196735844,
"learning_rate": 1.579644240374563e-05,
"loss": 0.0,
"step": 605500
},
{
"epoch": 0.9673920244434103,
"grad_norm": 0.0001604136050445959,
"learning_rate": 1.5777654166151063e-05,
"loss": 0.0,
"step": 606000
},
{
"epoch": 0.9681902026814,
"grad_norm": 0.0035548266023397446,
"learning_rate": 1.5758864705142705e-05,
"loss": 0.0,
"step": 606500
},
{
"epoch": 0.9689883809193895,
"grad_norm": 0.00016837022849358618,
"learning_rate": 1.574007405028033e-05,
"loss": 0.0,
"step": 607000
},
{
"epoch": 0.9697865591573792,
"grad_norm": 0.0028717576060444117,
"learning_rate": 1.5721282231125607e-05,
"loss": 0.0,
"step": 607500
},
{
"epoch": 0.9705847373953688,
"grad_norm": 0.00012833454820793122,
"learning_rate": 1.570248927724201e-05,
"loss": 0.0,
"step": 608000
},
{
"epoch": 0.9713829156333584,
"grad_norm": 0.0005946651217527688,
"learning_rate": 1.5683695218194816e-05,
"loss": 0.0,
"step": 608500
},
{
"epoch": 0.972181093871348,
"grad_norm": 0.00016886359662748873,
"learning_rate": 1.5664900083551052e-05,
"loss": 0.0,
"step": 609000
},
{
"epoch": 0.9729792721093377,
"grad_norm": 0.0061637298204004765,
"learning_rate": 1.564610390287941e-05,
"loss": 0.0,
"step": 609500
},
{
"epoch": 0.9737774503473272,
"grad_norm": 7.434777944581583e-05,
"learning_rate": 1.5627306705750245e-05,
"loss": 0.0,
"step": 610000
},
{
"epoch": 0.9745756285853169,
"grad_norm": 31.525394439697266,
"learning_rate": 1.5608508521735514e-05,
"loss": 0.0,
"step": 610500
},
{
"epoch": 0.9753738068233064,
"grad_norm": 0.0001488685084041208,
"learning_rate": 1.558970938040871e-05,
"loss": 0.0,
"step": 611000
},
{
"epoch": 0.9761719850612961,
"grad_norm": 0.00011100114352302626,
"learning_rate": 1.557090931134484e-05,
"loss": 0.0,
"step": 611500
},
{
"epoch": 0.9769701632992858,
"grad_norm": 0.00014055597421247512,
"learning_rate": 1.5552108344120384e-05,
"loss": 0.0,
"step": 612000
},
{
"epoch": 0.9777683415372753,
"grad_norm": 0.0002197181456722319,
"learning_rate": 1.5533306508313215e-05,
"loss": 0.0,
"step": 612500
},
{
"epoch": 0.978566519775265,
"grad_norm": 0.00010709422349464148,
"learning_rate": 1.5514503833502582e-05,
"loss": 0.0,
"step": 613000
},
{
"epoch": 0.9793646980132545,
"grad_norm": 7.577735232189298e-05,
"learning_rate": 1.5495700349269053e-05,
"loss": 0.0,
"step": 613500
},
{
"epoch": 0.9801628762512442,
"grad_norm": 0.00015610417176503688,
"learning_rate": 1.5476896085194475e-05,
"loss": 0.0,
"step": 614000
},
{
"epoch": 0.9809610544892338,
"grad_norm": 0.012848759070038795,
"learning_rate": 1.5458091070861908e-05,
"loss": 0.0,
"step": 614500
},
{
"epoch": 0.9817592327272234,
"grad_norm": 0.00010452749847900122,
"learning_rate": 1.54392853358556e-05,
"loss": 0.0,
"step": 615000
},
{
"epoch": 0.982557410965213,
"grad_norm": 0.00010649808245943859,
"learning_rate": 1.542047890976094e-05,
"loss": 0.0,
"step": 615500
},
{
"epoch": 0.9833555892032027,
"grad_norm": 0.00017509504687041044,
"learning_rate": 1.5401671822164384e-05,
"loss": 0.0,
"step": 616000
},
{
"epoch": 0.9841537674411922,
"grad_norm": 0.00021298132196534425,
"learning_rate": 1.538286410265346e-05,
"loss": 0.0,
"step": 616500
},
{
"epoch": 0.9849519456791819,
"grad_norm": 0.0001403950882377103,
"learning_rate": 1.5364055780816666e-05,
"loss": 0.0,
"step": 617000
},
{
"epoch": 0.9857501239171714,
"grad_norm": 0.00020762416534125805,
"learning_rate": 1.5345246886243443e-05,
"loss": 0.0,
"step": 617500
},
{
"epoch": 0.9865483021551611,
"grad_norm": 0.0014126452151685953,
"learning_rate": 1.5326437448524157e-05,
"loss": 0.0,
"step": 618000
},
{
"epoch": 0.9873464803931506,
"grad_norm": 0.00011496982915559784,
"learning_rate": 1.5307627497250003e-05,
"loss": 0.0,
"step": 618500
},
{
"epoch": 0.9881446586311403,
"grad_norm": 0.00021757918875664473,
"learning_rate": 1.528881706201301e-05,
"loss": 0.0,
"step": 619000
},
{
"epoch": 0.9889428368691299,
"grad_norm": 0.00021418675896711648,
"learning_rate": 1.5270006172405944e-05,
"loss": 0.0,
"step": 619500
},
{
"epoch": 0.9897410151071195,
"grad_norm": 0.00015302287647500634,
"learning_rate": 1.5251194858022304e-05,
"loss": 0.0,
"step": 620000
},
{
"epoch": 0.9905391933451091,
"grad_norm": 0.00011390163126634434,
"learning_rate": 1.5232383148456244e-05,
"loss": 0.0,
"step": 620500
},
{
"epoch": 0.9913373715830988,
"grad_norm": 0.0001891214051283896,
"learning_rate": 1.5213571073302543e-05,
"loss": 0.0,
"step": 621000
},
{
"epoch": 0.9921355498210883,
"grad_norm": 0.0003676644410006702,
"learning_rate": 1.5194758662156562e-05,
"loss": 0.0,
"step": 621500
},
{
"epoch": 0.992933728059078,
"grad_norm": 0.0007799739250913262,
"learning_rate": 1.5175945944614187e-05,
"loss": 0.0,
"step": 622000
},
{
"epoch": 0.9937319062970675,
"grad_norm": 0.00016387697542086244,
"learning_rate": 1.5157132950271781e-05,
"loss": 0.0,
"step": 622500
},
{
"epoch": 0.9945300845350572,
"grad_norm": 0.00018665984680410475,
"learning_rate": 1.5138319708726152e-05,
"loss": 0.0,
"step": 623000
},
{
"epoch": 0.9953282627730468,
"grad_norm": 0.00018064910545945168,
"learning_rate": 1.5119506249574488e-05,
"loss": 0.0,
"step": 623500
},
{
"epoch": 0.9961264410110364,
"grad_norm": 9.348099410999566e-05,
"learning_rate": 1.510069260241432e-05,
"loss": 0.0,
"step": 624000
},
{
"epoch": 0.996924619249026,
"grad_norm": 9.880396100925282e-05,
"learning_rate": 1.508187879684348e-05,
"loss": 0.0,
"step": 624500
},
{
"epoch": 0.9977227974870156,
"grad_norm": 0.000131562483147718,
"learning_rate": 1.506306486246005e-05,
"loss": 0.0,
"step": 625000
},
{
"epoch": 0.9985209757250052,
"grad_norm": 0.0006260851514525712,
"learning_rate": 1.5044250828862306e-05,
"loss": 0.0,
"step": 625500
},
{
"epoch": 0.9993191539629949,
"grad_norm": 0.00013737600238528103,
"learning_rate": 1.502543672564869e-05,
"loss": 0.0,
"step": 626000
},
{
"epoch": 1.0001173322009844,
"grad_norm": 0.0001540034863865003,
"learning_rate": 1.5006622582417749e-05,
"loss": 0.0,
"step": 626500
},
{
"epoch": 1.000915510438974,
"grad_norm": 0.0002153823006665334,
"learning_rate": 1.4987808428768086e-05,
"loss": 0.0,
"step": 627000
},
{
"epoch": 1.0017136886769638,
"grad_norm": 0.00011496929073473439,
"learning_rate": 1.4968994294298335e-05,
"loss": 0.0,
"step": 627500
},
{
"epoch": 1.0025118669149533,
"grad_norm": 0.0003018941206391901,
"learning_rate": 1.4950180208607091e-05,
"loss": 0.0,
"step": 628000
},
{
"epoch": 1.0033100451529429,
"grad_norm": 0.00031068435055203736,
"learning_rate": 1.493136620129287e-05,
"loss": 0.0,
"step": 628500
},
{
"epoch": 1.0041082233909326,
"grad_norm": 0.54404616355896,
"learning_rate": 1.4912552301954071e-05,
"loss": 0.0,
"step": 629000
},
{
"epoch": 1.0049064016289222,
"grad_norm": 0.002838683081790805,
"learning_rate": 1.4893738540188918e-05,
"loss": 0.0,
"step": 629500
},
{
"epoch": 1.0057045798669118,
"grad_norm": 0.00013099922216497362,
"learning_rate": 1.4874924945595423e-05,
"loss": 0.0,
"step": 630000
},
{
"epoch": 1.0065027581049013,
"grad_norm": 358.38702392578125,
"learning_rate": 1.485611154777133e-05,
"loss": 0.0,
"step": 630500
},
{
"epoch": 1.007300936342891,
"grad_norm": 0.0003976974403485656,
"learning_rate": 1.4837298376314081e-05,
"loss": 0.0,
"step": 631000
},
{
"epoch": 1.0080991145808806,
"grad_norm": 0.0004381372418720275,
"learning_rate": 1.4818485460820754e-05,
"loss": 0.0,
"step": 631500
},
{
"epoch": 1.0088972928188702,
"grad_norm": 0.0007524039829149842,
"learning_rate": 1.4799672830888027e-05,
"loss": 0.0,
"step": 632000
},
{
"epoch": 1.0096954710568598,
"grad_norm": 0.00017187898629345,
"learning_rate": 1.4780860516112132e-05,
"loss": 0.0,
"step": 632500
},
{
"epoch": 1.0104936492948495,
"grad_norm": 0.0002448650193400681,
"learning_rate": 1.4762048546088797e-05,
"loss": 0.0,
"step": 633000
},
{
"epoch": 1.011291827532839,
"grad_norm": 0.0002278346655657515,
"learning_rate": 1.474323695041322e-05,
"loss": 0.0,
"step": 633500
},
{
"epoch": 1.0120900057708286,
"grad_norm": 0.0001101360612665303,
"learning_rate": 1.4724425758679997e-05,
"loss": 0.0,
"step": 634000
},
{
"epoch": 1.0128881840088182,
"grad_norm": 0.00014911351900082082,
"learning_rate": 1.4705615000483101e-05,
"loss": 0.0,
"step": 634500
},
{
"epoch": 1.013686362246808,
"grad_norm": 0.00017947569722309709,
"learning_rate": 1.4686804705415812e-05,
"loss": 0.0,
"step": 635000
},
{
"epoch": 1.0144845404847975,
"grad_norm": 0.0002458689850755036,
"learning_rate": 1.4667994903070683e-05,
"loss": 0.0,
"step": 635500
},
{
"epoch": 1.015282718722787,
"grad_norm": 0.0003903007018379867,
"learning_rate": 1.4649185623039503e-05,
"loss": 0.0,
"step": 636000
},
{
"epoch": 1.0160808969607766,
"grad_norm": 0.00017779128393158317,
"learning_rate": 1.4630376894913225e-05,
"loss": 0.0,
"step": 636500
},
{
"epoch": 1.0168790751987664,
"grad_norm": 0.00012018175766570494,
"learning_rate": 1.4611568748281943e-05,
"loss": 0.0,
"step": 637000
},
{
"epoch": 1.017677253436756,
"grad_norm": 0.0001338824804406613,
"learning_rate": 1.4592761212734836e-05,
"loss": 0.0,
"step": 637500
},
{
"epoch": 1.0184754316747455,
"grad_norm": 0.00010453088179929182,
"learning_rate": 1.4573954317860103e-05,
"loss": 0.0,
"step": 638000
},
{
"epoch": 1.019273609912735,
"grad_norm": 6.280023808358237e-05,
"learning_rate": 1.4555148093244975e-05,
"loss": 0.0,
"step": 638500
},
{
"epoch": 1.0200717881507249,
"grad_norm": 0.0002268562384415418,
"learning_rate": 1.453634256847558e-05,
"loss": 0.0,
"step": 639000
},
{
"epoch": 1.0208699663887144,
"grad_norm": 0.00013508339179679751,
"learning_rate": 1.4517537773136987e-05,
"loss": 0.0,
"step": 639500
},
{
"epoch": 1.021668144626704,
"grad_norm": 0.00012045284529449418,
"learning_rate": 1.449873373681309e-05,
"loss": 0.0,
"step": 640000
},
{
"epoch": 1.0224663228646935,
"grad_norm": 9.69572938629426e-05,
"learning_rate": 1.4479930489086606e-05,
"loss": 0.0,
"step": 640500
},
{
"epoch": 1.0232645011026833,
"grad_norm": 0.00012936844723299146,
"learning_rate": 1.4461128059539004e-05,
"loss": 0.0,
"step": 641000
},
{
"epoch": 1.0240626793406729,
"grad_norm": 0.00017553639190737158,
"learning_rate": 1.4442326477750453e-05,
"loss": 0.0,
"step": 641500
},
{
"epoch": 1.0248608575786624,
"grad_norm": 0.0002106795145664364,
"learning_rate": 1.4423525773299819e-05,
"loss": 0.0,
"step": 642000
},
{
"epoch": 1.025659035816652,
"grad_norm": 0.00018383291899226606,
"learning_rate": 1.4404725975764552e-05,
"loss": 0.0,
"step": 642500
},
{
"epoch": 1.0264572140546417,
"grad_norm": 0.00012423249427229166,
"learning_rate": 1.438592711472071e-05,
"loss": 0.0,
"step": 643000
},
{
"epoch": 1.0272553922926313,
"grad_norm": 0.00032292716787196696,
"learning_rate": 1.4367129219742846e-05,
"loss": 0.0,
"step": 643500
},
{
"epoch": 1.0280535705306209,
"grad_norm": 0.00022273634385783225,
"learning_rate": 1.4348332320404026e-05,
"loss": 0.0,
"step": 644000
},
{
"epoch": 1.0288517487686106,
"grad_norm": 0.0002325259702047333,
"learning_rate": 1.4329536446275714e-05,
"loss": 0.0,
"step": 644500
},
{
"epoch": 1.0296499270066002,
"grad_norm": 0.03865138441324234,
"learning_rate": 1.4310741626927782e-05,
"loss": 0.0,
"step": 645000
},
{
"epoch": 1.0304481052445897,
"grad_norm": 0.0013154677581042051,
"learning_rate": 1.4291947891928453e-05,
"loss": 0.0,
"step": 645500
},
{
"epoch": 1.0312462834825793,
"grad_norm": 0.00014797232870478183,
"learning_rate": 1.427315527084421e-05,
"loss": 0.0,
"step": 646000
},
{
"epoch": 1.032044461720569,
"grad_norm": 1216.7861328125,
"learning_rate": 1.4254363793239825e-05,
"loss": 0.0,
"step": 646500
},
{
"epoch": 1.0328426399585586,
"grad_norm": 7.254226511577144e-05,
"learning_rate": 1.4235573488678238e-05,
"loss": 0.0,
"step": 647000
},
{
"epoch": 1.0336408181965482,
"grad_norm": 0.0005647933576256037,
"learning_rate": 1.4216784386720553e-05,
"loss": 0.0,
"step": 647500
},
{
"epoch": 1.0344389964345377,
"grad_norm": 0.0017171679064631462,
"learning_rate": 1.419799651692599e-05,
"loss": 0.0,
"step": 648000
},
{
"epoch": 1.0352371746725275,
"grad_norm": 0.00015221527428366244,
"learning_rate": 1.4179209908851827e-05,
"loss": 0.0,
"step": 648500
},
{
"epoch": 1.036035352910517,
"grad_norm": 0.0007889217813499272,
"learning_rate": 1.4160424592053353e-05,
"loss": 0.0,
"step": 649000
},
{
"epoch": 1.0368335311485066,
"grad_norm": 0.0001243455772055313,
"learning_rate": 1.4141640596083822e-05,
"loss": 0.0,
"step": 649500
},
{
"epoch": 1.0376317093864962,
"grad_norm": 0.0001685485476627946,
"learning_rate": 1.4122857950494433e-05,
"loss": 0.0,
"step": 650000
},
{
"epoch": 1.038429887624486,
"grad_norm": 0.0006130459951236844,
"learning_rate": 1.4104076684834227e-05,
"loss": 0.0,
"step": 650500
},
{
"epoch": 1.0392280658624755,
"grad_norm": 0.0002899725513998419,
"learning_rate": 1.4085296828650094e-05,
"loss": 0.0,
"step": 651000
},
{
"epoch": 1.040026244100465,
"grad_norm": 0.00017135177040472627,
"learning_rate": 1.406651841148671e-05,
"loss": 0.0,
"step": 651500
},
{
"epoch": 1.0408244223384546,
"grad_norm": 0.00029842773801647127,
"learning_rate": 1.404774146288647e-05,
"loss": 0.0,
"step": 652000
},
{
"epoch": 1.0416226005764444,
"grad_norm": 0.00023503368720412254,
"learning_rate": 1.4028966012389477e-05,
"loss": 0.0,
"step": 652500
},
{
"epoch": 1.042420778814434,
"grad_norm": 0.00017431171727366745,
"learning_rate": 1.4010192089533466e-05,
"loss": 0.0,
"step": 653000
},
{
"epoch": 1.0432189570524235,
"grad_norm": 0.00014720844046678394,
"learning_rate": 1.3991419723853775e-05,
"loss": 0.0,
"step": 653500
},
{
"epoch": 1.044017135290413,
"grad_norm": 0.00020027408027090132,
"learning_rate": 1.3972648944883288e-05,
"loss": 0.0,
"step": 654000
},
{
"epoch": 1.0448153135284028,
"grad_norm": 0.0001721490261843428,
"learning_rate": 1.3953879782152388e-05,
"loss": 0.0,
"step": 654500
},
{
"epoch": 1.0456134917663924,
"grad_norm": 0.00016651615442242473,
"learning_rate": 1.393511226518893e-05,
"loss": 0.0,
"step": 655000
},
{
"epoch": 1.046411670004382,
"grad_norm": 0.00025675626238808036,
"learning_rate": 1.3916346423518161e-05,
"loss": 0.0,
"step": 655500
},
{
"epoch": 1.0472098482423715,
"grad_norm": 0.0014689648523926735,
"learning_rate": 1.3897582286662714e-05,
"loss": 0.0,
"step": 656000
},
{
"epoch": 1.0480080264803613,
"grad_norm": 0.00023966317530721426,
"learning_rate": 1.387881988414252e-05,
"loss": 0.0,
"step": 656500
},
{
"epoch": 1.0488062047183508,
"grad_norm": 0.00015946978237479925,
"learning_rate": 1.3860059245474792e-05,
"loss": 0.0,
"step": 657000
},
{
"epoch": 1.0496043829563404,
"grad_norm": 0.0002671683905646205,
"learning_rate": 1.3841300400173968e-05,
"loss": 0.0,
"step": 657500
},
{
"epoch": 1.0504025611943302,
"grad_norm": 0.0008995188982225955,
"learning_rate": 1.3822543377751657e-05,
"loss": 0.0,
"step": 658000
},
{
"epoch": 1.0512007394323197,
"grad_norm": 0.00017635516996961087,
"learning_rate": 1.3803788207716616e-05,
"loss": 0.0,
"step": 658500
},
{
"epoch": 1.0519989176703093,
"grad_norm": 9.415735985385254e-05,
"learning_rate": 1.3785034919574666e-05,
"loss": 0.0,
"step": 659000
},
{
"epoch": 1.0527970959082988,
"grad_norm": 0.00011075485235778615,
"learning_rate": 1.376628354282869e-05,
"loss": 0.0,
"step": 659500
},
{
"epoch": 1.0535952741462886,
"grad_norm": 0.00012287896242924035,
"learning_rate": 1.3747534106978547e-05,
"loss": 0.0,
"step": 660000
},
{
"epoch": 1.0543934523842782,
"grad_norm": 0.00021028977062087506,
"learning_rate": 1.3728786641521046e-05,
"loss": 0.0,
"step": 660500
},
{
"epoch": 1.0551916306222677,
"grad_norm": 0.00026298430748283863,
"learning_rate": 1.3710041175949905e-05,
"loss": 0.0,
"step": 661000
},
{
"epoch": 1.0559898088602573,
"grad_norm": 0.00010695758101064712,
"learning_rate": 1.3691297739755685e-05,
"loss": 0.0,
"step": 661500
},
{
"epoch": 1.056787987098247,
"grad_norm": 0.00011361335054971278,
"learning_rate": 1.3672556362425764e-05,
"loss": 0.0,
"step": 662000
},
{
"epoch": 1.0575861653362366,
"grad_norm": 0.00017494260100647807,
"learning_rate": 1.3653817073444268e-05,
"loss": 0.0,
"step": 662500
},
{
"epoch": 1.0583843435742262,
"grad_norm": 0.0002041016996372491,
"learning_rate": 1.3635079902292054e-05,
"loss": 0.0,
"step": 663000
},
{
"epoch": 1.0591825218122157,
"grad_norm": 0.0003808980109170079,
"learning_rate": 1.3616344878446634e-05,
"loss": 0.0,
"step": 663500
},
{
"epoch": 1.0599807000502055,
"grad_norm": 0.00014107560855336487,
"learning_rate": 1.3597612031382143e-05,
"loss": 0.0,
"step": 664000
},
{
"epoch": 1.060778878288195,
"grad_norm": 0.00017715631111059338,
"learning_rate": 1.3578881390569305e-05,
"loss": 0.0,
"step": 664500
},
{
"epoch": 1.0615770565261846,
"grad_norm": 0.000133657013066113,
"learning_rate": 1.3560152985475353e-05,
"loss": 0.0,
"step": 665000
},
{
"epoch": 1.0623752347641742,
"grad_norm": 0.0020608582999557257,
"learning_rate": 1.354142684556402e-05,
"loss": 0.0,
"step": 665500
},
{
"epoch": 1.063173413002164,
"grad_norm": 9.932918328559026e-05,
"learning_rate": 1.3522703000295465e-05,
"loss": 0.0,
"step": 666000
},
{
"epoch": 1.0639715912401535,
"grad_norm": 0.00020408285490702838,
"learning_rate": 1.3503981479126238e-05,
"loss": 0.0,
"step": 666500
},
{
"epoch": 1.064769769478143,
"grad_norm": 0.00017636266420595348,
"learning_rate": 1.348526231150924e-05,
"loss": 0.0,
"step": 667000
},
{
"epoch": 1.0655679477161326,
"grad_norm": 0.005270059686154127,
"learning_rate": 1.3466545526893657e-05,
"loss": 0.0,
"step": 667500
},
{
"epoch": 1.0663661259541224,
"grad_norm": 0.00017116556409746408,
"learning_rate": 1.3447831154724944e-05,
"loss": 0.0,
"step": 668000
},
{
"epoch": 1.067164304192112,
"grad_norm": 0.00020884806872345507,
"learning_rate": 1.342911922444474e-05,
"loss": 0.0,
"step": 668500
},
{
"epoch": 1.0679624824301015,
"grad_norm": 0.00016449626127723604,
"learning_rate": 1.341040976549086e-05,
"loss": 0.0,
"step": 669000
},
{
"epoch": 1.068760660668091,
"grad_norm": 0.00012128291564295068,
"learning_rate": 1.3391702807297222e-05,
"loss": 0.0,
"step": 669500
},
{
"epoch": 1.0695588389060808,
"grad_norm": 0.0004791721876244992,
"learning_rate": 1.33729983792938e-05,
"loss": 0.0,
"step": 670000
},
{
"epoch": 1.0703570171440704,
"grad_norm": 0.0005541092832572758,
"learning_rate": 1.3354296510906615e-05,
"loss": 0.0,
"step": 670500
},
{
"epoch": 1.07115519538206,
"grad_norm": 0.22373533248901367,
"learning_rate": 1.3335597231557637e-05,
"loss": 0.0,
"step": 671000
},
{
"epoch": 1.0719533736200495,
"grad_norm": 0.00018599227769300342,
"learning_rate": 1.3316900570664773e-05,
"loss": 0.0,
"step": 671500
},
{
"epoch": 1.0727515518580393,
"grad_norm": 0.023390674963593483,
"learning_rate": 1.3298206557641807e-05,
"loss": 0.0,
"step": 672000
},
{
"epoch": 1.0735497300960288,
"grad_norm": 0.0001526430423837155,
"learning_rate": 1.327951522189836e-05,
"loss": 0.0,
"step": 672500
},
{
"epoch": 1.0743479083340184,
"grad_norm": 0.0005466815200634301,
"learning_rate": 1.3260826592839843e-05,
"loss": 0.0,
"step": 673000
},
{
"epoch": 1.075146086572008,
"grad_norm": 0.0001984064292628318,
"learning_rate": 1.3242140699867394e-05,
"loss": 0.0,
"step": 673500
},
{
"epoch": 1.0759442648099977,
"grad_norm": 0.0001757918653311208,
"learning_rate": 1.3223457572377876e-05,
"loss": 0.0,
"step": 674000
},
{
"epoch": 1.0767424430479873,
"grad_norm": 0.00047943569370545447,
"learning_rate": 1.3204777239763763e-05,
"loss": 0.0,
"step": 674500
},
{
"epoch": 1.0775406212859768,
"grad_norm": 0.00018678620108403265,
"learning_rate": 1.3186099731413175e-05,
"loss": 0.0,
"step": 675000
},
{
"epoch": 1.0783387995239666,
"grad_norm": 0.0001615070941625163,
"learning_rate": 1.316742507670975e-05,
"loss": 0.0,
"step": 675500
},
{
"epoch": 1.0791369777619562,
"grad_norm": 0.00011800303036579862,
"learning_rate": 1.3148753305032651e-05,
"loss": 0.0,
"step": 676000
},
{
"epoch": 1.0799351559999457,
"grad_norm": 0.00014160935825202614,
"learning_rate": 1.3130084445756528e-05,
"loss": 0.0,
"step": 676500
},
{
"epoch": 1.0807333342379353,
"grad_norm": 0.0001385206269333139,
"learning_rate": 1.3111418528251405e-05,
"loss": 0.0,
"step": 677000
},
{
"epoch": 1.081531512475925,
"grad_norm": 0.0001729640061967075,
"learning_rate": 1.309275558188272e-05,
"loss": 0.0,
"step": 677500
},
{
"epoch": 1.0823296907139146,
"grad_norm": 0.0074539934284985065,
"learning_rate": 1.3074095636011201e-05,
"loss": 0.0,
"step": 678000
},
{
"epoch": 1.0831278689519042,
"grad_norm": 0.0001781294122338295,
"learning_rate": 1.3055438719992892e-05,
"loss": 0.0,
"step": 678500
},
{
"epoch": 1.0839260471898937,
"grad_norm": 0.00014573968655895442,
"learning_rate": 1.3036784863179042e-05,
"loss": 0.0,
"step": 679000
},
{
"epoch": 1.0847242254278835,
"grad_norm": 0.006077378056943417,
"learning_rate": 1.301813409491609e-05,
"loss": 0.0,
"step": 679500
},
{
"epoch": 1.085522403665873,
"grad_norm": 0.2760709524154663,
"learning_rate": 1.2999486444545635e-05,
"loss": 0.0,
"step": 680000
},
{
"epoch": 1.0863205819038626,
"grad_norm": 0.006627124268561602,
"learning_rate": 1.2980841941404345e-05,
"loss": 0.0,
"step": 680500
},
{
"epoch": 1.0871187601418522,
"grad_norm": 0.0002801245136652142,
"learning_rate": 1.2962200614823972e-05,
"loss": 0.0,
"step": 681000
},
{
"epoch": 1.087916938379842,
"grad_norm": 0.00034790916834026575,
"learning_rate": 1.2943562494131222e-05,
"loss": 0.0,
"step": 681500
},
{
"epoch": 1.0887151166178315,
"grad_norm": 0.00035252582165412605,
"learning_rate": 1.2924927608647807e-05,
"loss": 0.0,
"step": 682000
},
{
"epoch": 1.089513294855821,
"grad_norm": 0.000455355504527688,
"learning_rate": 1.2906295987690317e-05,
"loss": 0.0,
"step": 682500
},
{
"epoch": 1.0903114730938106,
"grad_norm": 0.0007876930758357048,
"learning_rate": 1.2887667660570213e-05,
"loss": 0.0,
"step": 683000
},
{
"epoch": 1.0911096513318004,
"grad_norm": 0.00048001037794165313,
"learning_rate": 1.2869042656593782e-05,
"loss": 0.0,
"step": 683500
},
{
"epoch": 1.09190782956979,
"grad_norm": 0.0004877329047303647,
"learning_rate": 1.2850421005062076e-05,
"loss": 0.0,
"step": 684000
},
{
"epoch": 1.0927060078077795,
"grad_norm": 0.00021993753034621477,
"learning_rate": 1.2831802735270879e-05,
"loss": 0.0,
"step": 684500
},
{
"epoch": 1.093504186045769,
"grad_norm": 0.0003302933764643967,
"learning_rate": 1.2813187876510645e-05,
"loss": 0.0,
"step": 685000
},
{
"epoch": 1.0943023642837588,
"grad_norm": 1113.2105712890625,
"learning_rate": 1.2794576458066469e-05,
"loss": 0.0,
"step": 685500
},
{
"epoch": 1.0951005425217484,
"grad_norm": 0.0002000959066208452,
"learning_rate": 1.2775968509218036e-05,
"loss": 0.0,
"step": 686000
},
{
"epoch": 1.095898720759738,
"grad_norm": 0.00015401170821860433,
"learning_rate": 1.2757364059239562e-05,
"loss": 0.0,
"step": 686500
},
{
"epoch": 1.0966968989977275,
"grad_norm": 0.0001665474846959114,
"learning_rate": 1.2738763137399772e-05,
"loss": 0.0,
"step": 687000
},
{
"epoch": 1.0974950772357173,
"grad_norm": 0.00014819370699115098,
"learning_rate": 1.2720165772961828e-05,
"loss": 0.0,
"step": 687500
},
{
"epoch": 1.0982932554737068,
"grad_norm": 0.00019879864703398198,
"learning_rate": 1.270157199518331e-05,
"loss": 0.0,
"step": 688000
},
{
"epoch": 1.0990914337116964,
"grad_norm": 0.00024383983691222966,
"learning_rate": 1.2682981833316138e-05,
"loss": 0.0,
"step": 688500
},
{
"epoch": 1.0998896119496862,
"grad_norm": 0.0001553635229356587,
"learning_rate": 1.2664395316606553e-05,
"loss": 0.0,
"step": 689000
},
{
"epoch": 1.1006877901876757,
"grad_norm": 0.04126366972923279,
"learning_rate": 1.2645812474295068e-05,
"loss": 0.0,
"step": 689500
},
{
"epoch": 1.1014859684256653,
"grad_norm": 0.0001506950065959245,
"learning_rate": 1.2627233335616397e-05,
"loss": 0.0,
"step": 690000
},
{
"epoch": 1.1022841466636548,
"grad_norm": 0.00024802706320770085,
"learning_rate": 1.260865792979945e-05,
"loss": 0.0,
"step": 690500
},
{
"epoch": 1.1030823249016444,
"grad_norm": 0.00022820830054115504,
"learning_rate": 1.259008628606724e-05,
"loss": 0.0,
"step": 691000
},
{
"epoch": 1.1038805031396342,
"grad_norm": 0.00021335756173357368,
"learning_rate": 1.2571518433636885e-05,
"loss": 0.0,
"step": 691500
},
{
"epoch": 1.1046786813776237,
"grad_norm": 0.00015809416072443128,
"learning_rate": 1.2552954401719521e-05,
"loss": 0.0,
"step": 692000
},
{
"epoch": 1.1054768596156133,
"grad_norm": 0.00010023624054156244,
"learning_rate": 1.2534394219520282e-05,
"loss": 0.0,
"step": 692500
},
{
"epoch": 1.106275037853603,
"grad_norm": 0.0005553970113396645,
"learning_rate": 1.2515837916238249e-05,
"loss": 0.0,
"step": 693000
},
{
"epoch": 1.1070732160915926,
"grad_norm": 0.00018230122805107385,
"learning_rate": 1.2497285521066384e-05,
"loss": 0.0,
"step": 693500
},
{
"epoch": 1.1078713943295821,
"grad_norm": 0.00017253353144042194,
"learning_rate": 1.2478737063191525e-05,
"loss": 0.0,
"step": 694000
},
{
"epoch": 1.1086695725675717,
"grad_norm": 0.00014320742047857493,
"learning_rate": 1.2460192571794297e-05,
"loss": 0.0,
"step": 694500
},
{
"epoch": 1.1094677508055615,
"grad_norm": 0.0002893557248171419,
"learning_rate": 1.2441652076049085e-05,
"loss": 0.0,
"step": 695000
},
{
"epoch": 1.110265929043551,
"grad_norm": 0.0001792061812011525,
"learning_rate": 1.2423115605124003e-05,
"loss": 0.0,
"step": 695500
},
{
"epoch": 1.1110641072815406,
"grad_norm": 0.00013314814714249223,
"learning_rate": 1.2404583188180819e-05,
"loss": 0.0,
"step": 696000
},
{
"epoch": 1.1118622855195301,
"grad_norm": 0.037688203155994415,
"learning_rate": 1.2386054854374931e-05,
"loss": 0.0,
"step": 696500
},
{
"epoch": 1.11266046375752,
"grad_norm": 0.009215892292559147,
"learning_rate": 1.2367530632855307e-05,
"loss": 0.0,
"step": 697000
},
{
"epoch": 1.1134586419955095,
"grad_norm": 0.00022860315220896155,
"learning_rate": 1.2349010552764452e-05,
"loss": 0.0,
"step": 697500
},
{
"epoch": 1.114256820233499,
"grad_norm": 0.00016303629672620445,
"learning_rate": 1.2330494643238355e-05,
"loss": 0.0,
"step": 698000
},
{
"epoch": 1.1150549984714886,
"grad_norm": 0.00031790099455974996,
"learning_rate": 1.2311982933406434e-05,
"loss": 0.0,
"step": 698500
},
{
"epoch": 1.1158531767094784,
"grad_norm": 0.040996525436639786,
"learning_rate": 1.2293475452391517e-05,
"loss": 0.0,
"step": 699000
},
{
"epoch": 1.116651354947468,
"grad_norm": 0.00023965245054569095,
"learning_rate": 1.2274972229309758e-05,
"loss": 0.0,
"step": 699500
},
{
"epoch": 1.1174495331854575,
"grad_norm": 0.00019742768199648708,
"learning_rate": 1.2256473293270635e-05,
"loss": 0.0,
"step": 700000
},
{
"epoch": 1.118247711423447,
"grad_norm": 0.000235430255997926,
"learning_rate": 1.2237978673376863e-05,
"loss": 0.0,
"step": 700500
},
{
"epoch": 1.1190458896614368,
"grad_norm": 0.0002709669934120029,
"learning_rate": 1.2219488398724383e-05,
"loss": 0.0,
"step": 701000
},
{
"epoch": 1.1198440678994264,
"grad_norm": 0.00020392390433698893,
"learning_rate": 1.2201002498402283e-05,
"loss": 0.0,
"step": 701500
},
{
"epoch": 1.120642246137416,
"grad_norm": 0.00025341068976558745,
"learning_rate": 1.218252100149278e-05,
"loss": 0.0,
"step": 702000
},
{
"epoch": 1.1214404243754055,
"grad_norm": 0.00020573250367306173,
"learning_rate": 1.2164043937071166e-05,
"loss": 0.0,
"step": 702500
},
{
"epoch": 1.1222386026133953,
"grad_norm": 0.00016997568309307098,
"learning_rate": 1.2145571334205747e-05,
"loss": 0.0,
"step": 703000
},
{
"epoch": 1.1230367808513848,
"grad_norm": 0.001076328568160534,
"learning_rate": 1.2127103221957824e-05,
"loss": 0.0,
"step": 703500
},
{
"epoch": 1.1238349590893744,
"grad_norm": 0.0008634846308268607,
"learning_rate": 1.210863962938163e-05,
"loss": 0.0,
"step": 704000
},
{
"epoch": 1.124633137327364,
"grad_norm": 0.00017522821144666523,
"learning_rate": 1.2090180585524273e-05,
"loss": 0.0,
"step": 704500
},
{
"epoch": 1.1254313155653537,
"grad_norm": 0.0005584707832895219,
"learning_rate": 1.2071726119425731e-05,
"loss": 0.0,
"step": 705000
},
{
"epoch": 1.1262294938033433,
"grad_norm": 0.00016125263937283307,
"learning_rate": 1.205327626011875e-05,
"loss": 0.0,
"step": 705500
},
{
"epoch": 1.1270276720413328,
"grad_norm": 0.00014472042676061392,
"learning_rate": 1.2034831036628866e-05,
"loss": 0.0,
"step": 706000
},
{
"epoch": 1.1278258502793226,
"grad_norm": 0.0004964773543179035,
"learning_rate": 1.2016390477974277e-05,
"loss": 0.0,
"step": 706500
},
{
"epoch": 1.1286240285173121,
"grad_norm": 0.00017839822976384312,
"learning_rate": 1.1997954613165885e-05,
"loss": 0.0,
"step": 707000
},
{
"epoch": 1.1294222067553017,
"grad_norm": 0.00019834449631161988,
"learning_rate": 1.1979523471207184e-05,
"loss": 0.0,
"step": 707500
},
{
"epoch": 1.1302203849932912,
"grad_norm": 0.00014970562187954783,
"learning_rate": 1.196109708109423e-05,
"loss": 0.0,
"step": 708000
},
{
"epoch": 1.1310185632312808,
"grad_norm": 0.00013826471695210785,
"learning_rate": 1.194267547181563e-05,
"loss": 0.0,
"step": 708500
},
{
"epoch": 1.1318167414692706,
"grad_norm": 0.0002837497158907354,
"learning_rate": 1.1924258672352443e-05,
"loss": 0.0,
"step": 709000
},
{
"epoch": 1.1326149197072601,
"grad_norm": 0.09026394784450531,
"learning_rate": 1.190584671167819e-05,
"loss": 0.0,
"step": 709500
},
{
"epoch": 1.1334130979452497,
"grad_norm": 0.0003425665490794927,
"learning_rate": 1.1887439618758744e-05,
"loss": 0.0,
"step": 710000
},
{
"epoch": 1.1342112761832395,
"grad_norm": 0.0004137590294703841,
"learning_rate": 1.186903742255236e-05,
"loss": 0.0,
"step": 710500
},
{
"epoch": 1.135009454421229,
"grad_norm": 0.0014002566458657384,
"learning_rate": 1.1850640152009552e-05,
"loss": 0.0,
"step": 711000
},
{
"epoch": 1.1358076326592186,
"grad_norm": 0.00042761804070323706,
"learning_rate": 1.18322478360731e-05,
"loss": 0.0,
"step": 711500
},
{
"epoch": 1.1366058108972081,
"grad_norm": 0.0002522016875445843,
"learning_rate": 1.1813860503678006e-05,
"loss": 0.0,
"step": 712000
},
{
"epoch": 1.137403989135198,
"grad_norm": 0.0008443945553153753,
"learning_rate": 1.17954781837514e-05,
"loss": 0.0,
"step": 712500
},
{
"epoch": 1.1382021673731875,
"grad_norm": 0.018911859020590782,
"learning_rate": 1.1777100905212562e-05,
"loss": 0.0,
"step": 713000
},
{
"epoch": 1.139000345611177,
"grad_norm": 0.00022410589735955,
"learning_rate": 1.1758728696972803e-05,
"loss": 0.0,
"step": 713500
},
{
"epoch": 1.1397985238491666,
"grad_norm": 0.00019726462778635323,
"learning_rate": 1.1740361587935484e-05,
"loss": 0.0,
"step": 714000
},
{
"epoch": 1.1405967020871564,
"grad_norm": 0.00034632792812772095,
"learning_rate": 1.1721999606995939e-05,
"loss": 0.0,
"step": 714500
},
{
"epoch": 1.141394880325146,
"grad_norm": 0.0001660481939325109,
"learning_rate": 1.1703642783041423e-05,
"loss": 0.0,
"step": 715000
},
{
"epoch": 1.1421930585631355,
"grad_norm": 0.00020775549637619406,
"learning_rate": 1.1685291144951097e-05,
"loss": 0.0,
"step": 715500
},
{
"epoch": 1.142991236801125,
"grad_norm": 0.00017729878891259432,
"learning_rate": 1.166694472159594e-05,
"loss": 0.0,
"step": 716000
},
{
"epoch": 1.1437894150391148,
"grad_norm": 0.00020900214440189302,
"learning_rate": 1.1648603541838759e-05,
"loss": 0.0,
"step": 716500
},
{
"epoch": 1.1445875932771044,
"grad_norm": 0.00021771031606476754,
"learning_rate": 1.1630267634534078e-05,
"loss": 0.0,
"step": 717000
},
{
"epoch": 1.145385771515094,
"grad_norm": 0.000251882360316813,
"learning_rate": 1.161193702852814e-05,
"loss": 0.0,
"step": 717500
},
{
"epoch": 1.1461839497530835,
"grad_norm": 0.00020855554612353444,
"learning_rate": 1.1593611752658857e-05,
"loss": 0.0,
"step": 718000
},
{
"epoch": 1.1469821279910732,
"grad_norm": 0.00013053267321083695,
"learning_rate": 1.1575291835755743e-05,
"loss": 0.0,
"step": 718500
},
{
"epoch": 1.1477803062290628,
"grad_norm": 0.00018806445586960763,
"learning_rate": 1.155697730663989e-05,
"loss": 0.0,
"step": 719000
},
{
"epoch": 1.1485784844670524,
"grad_norm": 0.00016325576871167868,
"learning_rate": 1.1538668194123901e-05,
"loss": 0.0,
"step": 719500
},
{
"epoch": 1.1493766627050421,
"grad_norm": 0.00024883818696253,
"learning_rate": 1.152036452701188e-05,
"loss": 0.0,
"step": 720000
},
{
"epoch": 1.1501748409430317,
"grad_norm": 0.0001439152838429436,
"learning_rate": 1.1502066334099339e-05,
"loss": 0.0,
"step": 720500
},
{
"epoch": 1.1509730191810212,
"grad_norm": 0.010649287141859531,
"learning_rate": 1.1483773644173191e-05,
"loss": 0.0,
"step": 721000
},
{
"epoch": 1.1517711974190108,
"grad_norm": 0.0003836087416857481,
"learning_rate": 1.1465486486011695e-05,
"loss": 0.0,
"step": 721500
},
{
"epoch": 1.1525693756570004,
"grad_norm": 0.00016571102605666965,
"learning_rate": 1.1447204888384396e-05,
"loss": 0.0,
"step": 722000
},
{
"epoch": 1.1533675538949901,
"grad_norm": 0.003242659382522106,
"learning_rate": 1.1428928880052106e-05,
"loss": 0.0,
"step": 722500
},
{
"epoch": 1.1541657321329797,
"grad_norm": 0.0001464882370783016,
"learning_rate": 1.141065848976683e-05,
"loss": 0.0,
"step": 723000
},
{
"epoch": 1.1549639103709692,
"grad_norm": 0.0001437840110156685,
"learning_rate": 1.1392393746271738e-05,
"loss": 0.0,
"step": 723500
},
{
"epoch": 1.155762088608959,
"grad_norm": 9.78041862254031e-05,
"learning_rate": 1.1374134678301124e-05,
"loss": 0.0,
"step": 724000
},
{
"epoch": 1.1565602668469486,
"grad_norm": 0.01296873390674591,
"learning_rate": 1.1355881314580341e-05,
"loss": 0.0,
"step": 724500
},
{
"epoch": 1.1573584450849381,
"grad_norm": 0.000577951839659363,
"learning_rate": 1.1337633683825783e-05,
"loss": 0.0,
"step": 725000
},
{
"epoch": 1.1581566233229277,
"grad_norm": 0.0002495780645404011,
"learning_rate": 1.1319391814744808e-05,
"loss": 0.0,
"step": 725500
},
{
"epoch": 1.1589548015609175,
"grad_norm": 0.00020137692627031356,
"learning_rate": 1.1301155736035725e-05,
"loss": 0.0,
"step": 726000
},
{
"epoch": 1.159752979798907,
"grad_norm": 0.0002284547663293779,
"learning_rate": 1.1282925476387725e-05,
"loss": 0.0,
"step": 726500
},
{
"epoch": 1.1605511580368966,
"grad_norm": 0.0001997579965973273,
"learning_rate": 1.1264701064480842e-05,
"loss": 0.0,
"step": 727000
},
{
"epoch": 1.1613493362748861,
"grad_norm": 0.00020096104708500206,
"learning_rate": 1.1246482528985918e-05,
"loss": 0.0,
"step": 727500
},
{
"epoch": 1.162147514512876,
"grad_norm": 0.0002553035446908325,
"learning_rate": 1.1228269898564545e-05,
"loss": 0.0,
"step": 728000
},
{
"epoch": 1.1629456927508655,
"grad_norm": 0.00024139387824106961,
"learning_rate": 1.121006320186903e-05,
"loss": 0.0,
"step": 728500
},
{
"epoch": 1.163743870988855,
"grad_norm": 0.0002338308549951762,
"learning_rate": 1.1191862467542337e-05,
"loss": 0.0,
"step": 729000
},
{
"epoch": 1.1645420492268446,
"grad_norm": 0.00048753820010460913,
"learning_rate": 1.117366772421806e-05,
"loss": 0.0,
"step": 729500
},
{
"epoch": 1.1653402274648343,
"grad_norm": 0.00029974625795148313,
"learning_rate": 1.1155479000520359e-05,
"loss": 0.0,
"step": 730000
},
{
"epoch": 1.166138405702824,
"grad_norm": 0.0002745148085523397,
"learning_rate": 1.1137296325063923e-05,
"loss": 0.0,
"step": 730500
},
{
"epoch": 1.1669365839408135,
"grad_norm": 0.0003195735043846071,
"learning_rate": 1.1119119726453938e-05,
"loss": 0.0,
"step": 731000
},
{
"epoch": 1.167734762178803,
"grad_norm": 0.0003031744563486427,
"learning_rate": 1.1100949233286018e-05,
"loss": 0.0,
"step": 731500
},
{
"epoch": 1.1685329404167928,
"grad_norm": 0.0005830961745232344,
"learning_rate": 1.1082784874146175e-05,
"loss": 0.0,
"step": 732000
},
{
"epoch": 1.1693311186547823,
"grad_norm": 0.0003262453246861696,
"learning_rate": 1.1064626677610778e-05,
"loss": 0.0,
"step": 732500
},
{
"epoch": 1.170129296892772,
"grad_norm": 0.00045430276077240705,
"learning_rate": 1.1046474672246483e-05,
"loss": 0.0,
"step": 733000
},
{
"epoch": 1.1709274751307617,
"grad_norm": 0.0004028049297630787,
"learning_rate": 1.1028328886610229e-05,
"loss": 0.0,
"step": 733500
},
{
"epoch": 1.1717256533687512,
"grad_norm": 0.00026213665842078626,
"learning_rate": 1.101018934924915e-05,
"loss": 0.0,
"step": 734000
},
{
"epoch": 1.1725238316067408,
"grad_norm": 0.00020008819410577416,
"learning_rate": 1.099205608870057e-05,
"loss": 0.0,
"step": 734500
},
{
"epoch": 1.1733220098447303,
"grad_norm": 0.0004212880157865584,
"learning_rate": 1.0973929133491912e-05,
"loss": 0.0,
"step": 735000
},
{
"epoch": 1.17412018808272,
"grad_norm": 0.00021266612748149782,
"learning_rate": 1.0955808512140709e-05,
"loss": 0.0,
"step": 735500
},
{
"epoch": 1.1749183663207097,
"grad_norm": 0.00032644724706187844,
"learning_rate": 1.093769425315451e-05,
"loss": 0.0,
"step": 736000
},
{
"epoch": 1.1757165445586992,
"grad_norm": 0.09483543783426285,
"learning_rate": 1.0919586385030849e-05,
"loss": 0.0,
"step": 736500
},
{
"epoch": 1.1765147227966888,
"grad_norm": 0.00034892070107162,
"learning_rate": 1.0901484936257235e-05,
"loss": 0.0,
"step": 737000
},
{
"epoch": 1.1773129010346786,
"grad_norm": 0.00021350174210965633,
"learning_rate": 1.0883389935311041e-05,
"loss": 0.0,
"step": 737500
},
{
"epoch": 1.1781110792726681,
"grad_norm": 0.00020187548943795264,
"learning_rate": 1.086530141065953e-05,
"loss": 0.0,
"step": 738000
},
{
"epoch": 1.1789092575106577,
"grad_norm": 0.0004036907048430294,
"learning_rate": 1.0847219390759752e-05,
"loss": 0.0,
"step": 738500
},
{
"epoch": 1.1797074357486472,
"grad_norm": 0.0001676503597991541,
"learning_rate": 1.082914390405854e-05,
"loss": 0.0,
"step": 739000
},
{
"epoch": 1.1805056139866368,
"grad_norm": 0.0003050376835744828,
"learning_rate": 1.0811074978992437e-05,
"loss": 0.0,
"step": 739500
},
{
"epoch": 1.1813037922246266,
"grad_norm": 0.0001976622297661379,
"learning_rate": 1.0793012643987662e-05,
"loss": 0.0,
"step": 740000
},
{
"epoch": 1.1821019704626161,
"grad_norm": 0.00024282137746922672,
"learning_rate": 1.0774956927460085e-05,
"loss": 0.0,
"step": 740500
},
{
"epoch": 1.1829001487006057,
"grad_norm": 0.00012517427967395633,
"learning_rate": 1.0756907857815136e-05,
"loss": 0.0,
"step": 741000
},
{
"epoch": 1.1836983269385954,
"grad_norm": 0.00019520529895089567,
"learning_rate": 1.0738865463447822e-05,
"loss": 0.0,
"step": 741500
},
{
"epoch": 1.184496505176585,
"grad_norm": 0.0006624461966566741,
"learning_rate": 1.0720829772742615e-05,
"loss": 0.0,
"step": 742000
},
{
"epoch": 1.1852946834145746,
"grad_norm": 0.00021578549058176577,
"learning_rate": 1.070280081407345e-05,
"loss": 0.0,
"step": 742500
},
{
"epoch": 1.1860928616525641,
"grad_norm": 2.1622965335845947,
"learning_rate": 1.0684778615803701e-05,
"loss": 0.0,
"step": 743000
},
{
"epoch": 1.186891039890554,
"grad_norm": 0.0002855357888620347,
"learning_rate": 1.0666763206286051e-05,
"loss": 0.0,
"step": 743500
},
{
"epoch": 1.1876892181285434,
"grad_norm": 0.00014283708878792822,
"learning_rate": 1.064875461386256e-05,
"loss": 0.0,
"step": 744000
},
{
"epoch": 1.188487396366533,
"grad_norm": 0.00024032694636844099,
"learning_rate": 1.0630752866864518e-05,
"loss": 0.0,
"step": 744500
},
{
"epoch": 1.1892855746045226,
"grad_norm": 0.00016632409824524075,
"learning_rate": 1.0612757993612478e-05,
"loss": 0.0,
"step": 745000
},
{
"epoch": 1.1900837528425123,
"grad_norm": 0.00023169444466475397,
"learning_rate": 1.059477002241616e-05,
"loss": 0.0,
"step": 745500
},
{
"epoch": 1.1908819310805019,
"grad_norm": 0.0002732094144448638,
"learning_rate": 1.0576788981574428e-05,
"loss": 0.0,
"step": 746000
},
{
"epoch": 1.1916801093184914,
"grad_norm": 0.0003594690060708672,
"learning_rate": 1.055881489937525e-05,
"loss": 0.0,
"step": 746500
},
{
"epoch": 1.192478287556481,
"grad_norm": 0.01778334006667137,
"learning_rate": 1.0540847804095639e-05,
"loss": 0.0,
"step": 747000
},
{
"epoch": 1.1932764657944708,
"grad_norm": 0.00024451143690384924,
"learning_rate": 1.0522887724001632e-05,
"loss": 0.0,
"step": 747500
},
{
"epoch": 1.1940746440324603,
"grad_norm": 0.0002816928317770362,
"learning_rate": 1.0504934687348198e-05,
"loss": 0.0,
"step": 748000
},
{
"epoch": 1.1948728222704499,
"grad_norm": 0.00029887750861234963,
"learning_rate": 1.048698872237927e-05,
"loss": 0.0,
"step": 748500
},
{
"epoch": 1.1956710005084394,
"grad_norm": 0.000365947576938197,
"learning_rate": 1.0469049857327611e-05,
"loss": 0.0,
"step": 749000
},
{
"epoch": 1.1964691787464292,
"grad_norm": 0.00027725560357794166,
"learning_rate": 1.0451118120414837e-05,
"loss": 0.0,
"step": 749500
},
{
"epoch": 1.1972673569844188,
"grad_norm": 0.00019809386867564172,
"learning_rate": 1.0433193539851356e-05,
"loss": 0.0,
"step": 750000
},
{
"epoch": 1.1972673569844188,
"eval_loss": 1.1492022167658433e-05,
"eval_runtime": 22209.8831,
"eval_samples_per_second": 100.284,
"eval_steps_per_second": 3.134,
"step": 750000
},
{
"epoch": 1.1980655352224083,
"grad_norm": 0.24246802926063538,
"learning_rate": 1.0415276143836297e-05,
"loss": 0.0,
"step": 750500
},
{
"epoch": 1.198863713460398,
"grad_norm": 0.00016463996144011617,
"learning_rate": 1.0397365960557508e-05,
"loss": 0.0,
"step": 751000
},
{
"epoch": 1.1996618916983877,
"grad_norm": 0.00020477671932894737,
"learning_rate": 1.0379463018191474e-05,
"loss": 0.0,
"step": 751500
},
{
"epoch": 1.2004600699363772,
"grad_norm": 0.0001754205150064081,
"learning_rate": 1.0361567344903292e-05,
"loss": 0.0,
"step": 752000
},
{
"epoch": 1.2012582481743668,
"grad_norm": 76.75057220458984,
"learning_rate": 1.0343678968846633e-05,
"loss": 0.0,
"step": 752500
},
{
"epoch": 1.2020564264123563,
"grad_norm": 0.00011788753909058869,
"learning_rate": 1.0325797918163671e-05,
"loss": 0.0,
"step": 753000
},
{
"epoch": 1.202854604650346,
"grad_norm": 0.00018122825713362545,
"learning_rate": 1.030792422098507e-05,
"loss": 0.0,
"step": 753500
},
{
"epoch": 1.2036527828883357,
"grad_norm": 0.00012735063501168042,
"learning_rate": 1.029005790542992e-05,
"loss": 0.0,
"step": 754000
},
{
"epoch": 1.2044509611263252,
"grad_norm": 0.0020100900437682867,
"learning_rate": 1.0272198999605701e-05,
"loss": 0.0,
"step": 754500
},
{
"epoch": 1.205249139364315,
"grad_norm": 0.004669174086302519,
"learning_rate": 1.025434753160823e-05,
"loss": 0.0,
"step": 755000
},
{
"epoch": 1.2060473176023045,
"grad_norm": 0.00020723527995869517,
"learning_rate": 1.0236503529521623e-05,
"loss": 0.0,
"step": 755500
},
{
"epoch": 1.206845495840294,
"grad_norm": 0.00024396587105002254,
"learning_rate": 1.021866702141826e-05,
"loss": 0.0,
"step": 756000
},
{
"epoch": 1.2076436740782837,
"grad_norm": 0.0006310238968580961,
"learning_rate": 1.0200838035358719e-05,
"loss": 0.0,
"step": 756500
},
{
"epoch": 1.2084418523162732,
"grad_norm": 0.00026260962476953864,
"learning_rate": 1.0183016599391756e-05,
"loss": 0.0,
"step": 757000
},
{
"epoch": 1.209240030554263,
"grad_norm": 0.0001703925954643637,
"learning_rate": 1.0165202741554238e-05,
"loss": 0.0,
"step": 757500
},
{
"epoch": 1.2100382087922525,
"grad_norm": 0.0003848731575999409,
"learning_rate": 1.014739648987112e-05,
"loss": 0.0,
"step": 758000
},
{
"epoch": 1.210836387030242,
"grad_norm": 0.00011386480764485896,
"learning_rate": 1.0129597872355384e-05,
"loss": 0.0,
"step": 758500
},
{
"epoch": 1.2116345652682319,
"grad_norm": 0.0001352078834315762,
"learning_rate": 1.0111806917008004e-05,
"loss": 0.0,
"step": 759000
},
{
"epoch": 1.2124327435062214,
"grad_norm": 7.882779755163938e-05,
"learning_rate": 1.00940236518179e-05,
"loss": 0.0,
"step": 759500
},
{
"epoch": 1.213230921744211,
"grad_norm": 0.0001364546042168513,
"learning_rate": 1.0076248104761892e-05,
"loss": 0.0,
"step": 760000
},
{
"epoch": 1.2140290999822005,
"grad_norm": 0.00026103860000148416,
"learning_rate": 1.0058480303804666e-05,
"loss": 0.0,
"step": 760500
},
{
"epoch": 1.2148272782201903,
"grad_norm": 0.00013641221448779106,
"learning_rate": 1.0040720276898708e-05,
"loss": 0.0,
"step": 761000
},
{
"epoch": 1.2156254564581799,
"grad_norm": 0.0008844132535159588,
"learning_rate": 1.0022968051984282e-05,
"loss": 0.0,
"step": 761500
},
{
"epoch": 1.2164236346961694,
"grad_norm": 0.00033063263981603086,
"learning_rate": 1.0005223656989379e-05,
"loss": 0.0,
"step": 762000
},
{
"epoch": 1.217221812934159,
"grad_norm": 0.00022558389173354954,
"learning_rate": 9.98748711982967e-06,
"loss": 0.0,
"step": 762500
},
{
"epoch": 1.2180199911721488,
"grad_norm": 0.0001900517090689391,
"learning_rate": 9.969758468408462e-06,
"loss": 0.0,
"step": 763000
},
{
"epoch": 1.2188181694101383,
"grad_norm": 0.00016768294153735042,
"learning_rate": 9.952037730616658e-06,
"loss": 0.0,
"step": 763500
},
{
"epoch": 1.2196163476481279,
"grad_norm": 0.0002104245504597202,
"learning_rate": 9.934324934332713e-06,
"loss": 0.0,
"step": 764000
},
{
"epoch": 1.2204145258861174,
"grad_norm": 0.00023655268887523562,
"learning_rate": 9.916620107422582e-06,
"loss": 0.0,
"step": 764500
},
{
"epoch": 1.2212127041241072,
"grad_norm": 0.0002007113944273442,
"learning_rate": 9.89892327773969e-06,
"loss": 0.0,
"step": 765000
},
{
"epoch": 1.2220108823620968,
"grad_norm": 9.15752124786377,
"learning_rate": 9.881234473124877e-06,
"loss": 0.0,
"step": 765500
},
{
"epoch": 1.2228090606000863,
"grad_norm": 0.0015039691934362054,
"learning_rate": 9.863553721406356e-06,
"loss": 0.0,
"step": 766000
},
{
"epoch": 1.2236072388380759,
"grad_norm": 0.00017231931269634515,
"learning_rate": 9.845881050399678e-06,
"loss": 0.0,
"step": 766500
},
{
"epoch": 1.2244054170760656,
"grad_norm": 0.00019999749201815575,
"learning_rate": 9.828216487907672e-06,
"loss": 0.0,
"step": 767000
},
{
"epoch": 1.2252035953140552,
"grad_norm": 0.0002266662777401507,
"learning_rate": 9.810560061720419e-06,
"loss": 0.0,
"step": 767500
},
{
"epoch": 1.2260017735520448,
"grad_norm": 0.08027360588312149,
"learning_rate": 9.792911799615198e-06,
"loss": 0.0,
"step": 768000
},
{
"epoch": 1.2267999517900345,
"grad_norm": 0.00035596557427197695,
"learning_rate": 9.775271729356429e-06,
"loss": 0.0,
"step": 768500
},
{
"epoch": 1.227598130028024,
"grad_norm": 0.0004455151502043009,
"learning_rate": 9.757639878695674e-06,
"loss": 0.0,
"step": 769000
},
{
"epoch": 1.2283963082660136,
"grad_norm": 0.00016969860007520765,
"learning_rate": 9.74001627537154e-06,
"loss": 0.0,
"step": 769500
},
{
"epoch": 1.2291944865040032,
"grad_norm": 0.0002785904798656702,
"learning_rate": 9.72240094710967e-06,
"loss": 0.0,
"step": 770000
},
{
"epoch": 1.2299926647419928,
"grad_norm": 0.00011213342804694548,
"learning_rate": 9.704793921622687e-06,
"loss": 0.0,
"step": 770500
},
{
"epoch": 1.2307908429799825,
"grad_norm": 0.00030834253993816674,
"learning_rate": 9.68719522661014e-06,
"loss": 0.0,
"step": 771000
},
{
"epoch": 1.231589021217972,
"grad_norm": 0.0002819538349285722,
"learning_rate": 9.6696048897585e-06,
"loss": 0.0,
"step": 771500
},
{
"epoch": 1.2323871994559616,
"grad_norm": 0.00018688519776333123,
"learning_rate": 9.652022938741049e-06,
"loss": 0.0,
"step": 772000
},
{
"epoch": 1.2331853776939514,
"grad_norm": 0.00018685254326555878,
"learning_rate": 9.634449401217926e-06,
"loss": 0.0,
"step": 772500
},
{
"epoch": 1.233983555931941,
"grad_norm": 0.00023071758914738894,
"learning_rate": 9.616884304835981e-06,
"loss": 0.0,
"step": 773000
},
{
"epoch": 1.2347817341699305,
"grad_norm": 0.0003963226336054504,
"learning_rate": 9.59932767722883e-06,
"loss": 0.0,
"step": 773500
},
{
"epoch": 1.23557991240792,
"grad_norm": 0.0005826257402077317,
"learning_rate": 9.581779546016741e-06,
"loss": 0.0,
"step": 774000
},
{
"epoch": 1.2363780906459099,
"grad_norm": 0.0002092513459501788,
"learning_rate": 9.56423993880661e-06,
"loss": 0.0,
"step": 774500
},
{
"epoch": 1.2371762688838994,
"grad_norm": 0.00026280272868461907,
"learning_rate": 9.546708883191948e-06,
"loss": 0.0,
"step": 775000
},
{
"epoch": 1.237974447121889,
"grad_norm": 0.0003001219010911882,
"learning_rate": 9.529186406752782e-06,
"loss": 0.0,
"step": 775500
},
{
"epoch": 1.2387726253598785,
"grad_norm": 0.002450470346957445,
"learning_rate": 9.511672537055676e-06,
"loss": 0.0,
"step": 776000
},
{
"epoch": 1.2395708035978683,
"grad_norm": 0.004587731324136257,
"learning_rate": 9.494167301653618e-06,
"loss": 0.0,
"step": 776500
},
{
"epoch": 1.2403689818358579,
"grad_norm": 0.00031009313534013927,
"learning_rate": 9.47667072808605e-06,
"loss": 0.0,
"step": 777000
},
{
"epoch": 1.2411671600738474,
"grad_norm": 0.5249637365341187,
"learning_rate": 9.459182843878752e-06,
"loss": 0.0,
"step": 777500
},
{
"epoch": 1.241965338311837,
"grad_norm": 0.00017867004498839378,
"learning_rate": 9.441703676543848e-06,
"loss": 0.0,
"step": 778000
},
{
"epoch": 1.2427635165498268,
"grad_norm": 0.0002787476987577975,
"learning_rate": 9.424233253579762e-06,
"loss": 0.0,
"step": 778500
},
{
"epoch": 1.2435616947878163,
"grad_norm": 0.00018374405044596642,
"learning_rate": 9.406771602471137e-06,
"loss": 0.0,
"step": 779000
},
{
"epoch": 1.2443598730258059,
"grad_norm": 0.0002696373558137566,
"learning_rate": 9.38931875068884e-06,
"loss": 0.0,
"step": 779500
},
{
"epoch": 1.2451580512637954,
"grad_norm": 0.00013673820649273694,
"learning_rate": 9.371874725689875e-06,
"loss": 0.0,
"step": 780000
},
{
"epoch": 1.2459562295017852,
"grad_norm": 0.0001669849589234218,
"learning_rate": 9.354439554917364e-06,
"loss": 0.0,
"step": 780500
},
{
"epoch": 1.2467544077397748,
"grad_norm": 0.00021636247402057052,
"learning_rate": 9.33701326580051e-06,
"loss": 0.0,
"step": 781000
},
{
"epoch": 1.2475525859777643,
"grad_norm": 0.00017172202933579683,
"learning_rate": 9.319595885754533e-06,
"loss": 0.0,
"step": 781500
},
{
"epoch": 1.248350764215754,
"grad_norm": 0.00012820841220673174,
"learning_rate": 9.302187442180641e-06,
"loss": 0.0,
"step": 782000
},
{
"epoch": 1.2491489424537436,
"grad_norm": 0.00019042339408770204,
"learning_rate": 9.28478796246598e-06,
"loss": 0.0,
"step": 782500
},
{
"epoch": 1.2499471206917332,
"grad_norm": 0.00022117479238659143,
"learning_rate": 9.267397473983602e-06,
"loss": 0.0,
"step": 783000
},
{
"epoch": 1.2507452989297227,
"grad_norm": 0.00012226690887473524,
"learning_rate": 9.250016004092404e-06,
"loss": 0.0,
"step": 783500
},
{
"epoch": 1.2515434771677123,
"grad_norm": 0.00023977088858373463,
"learning_rate": 9.232643580137095e-06,
"loss": 0.0,
"step": 784000
},
{
"epoch": 1.252341655405702,
"grad_norm": 0.00018954268307425082,
"learning_rate": 9.215280229448168e-06,
"loss": 0.0,
"step": 784500
},
{
"epoch": 1.2531398336436916,
"grad_norm": 0.0015390360495075583,
"learning_rate": 9.197925979341817e-06,
"loss": 0.0,
"step": 785000
},
{
"epoch": 1.2539380118816812,
"grad_norm": 0.0001842692872742191,
"learning_rate": 9.180580857119946e-06,
"loss": 0.0,
"step": 785500
},
{
"epoch": 1.254736190119671,
"grad_norm": 0.006291312165558338,
"learning_rate": 9.163244890070076e-06,
"loss": 0.0,
"step": 786000
},
{
"epoch": 1.2555343683576605,
"grad_norm": 0.00022410901146940887,
"learning_rate": 9.145918105465339e-06,
"loss": 0.0,
"step": 786500
},
{
"epoch": 1.25633254659565,
"grad_norm": 0.00015199794142972678,
"learning_rate": 9.128600530564417e-06,
"loss": 0.0,
"step": 787000
},
{
"epoch": 1.2571307248336396,
"grad_norm": 0.00016999320359900594,
"learning_rate": 9.1112921926115e-06,
"loss": 0.0,
"step": 787500
},
{
"epoch": 1.2579289030716292,
"grad_norm": 0.00017655811097938567,
"learning_rate": 9.09399311883625e-06,
"loss": 0.0,
"step": 788000
},
{
"epoch": 1.258727081309619,
"grad_norm": 0.00023002490343060344,
"learning_rate": 9.07670333645375e-06,
"loss": 0.0,
"step": 788500
},
{
"epoch": 1.2595252595476085,
"grad_norm": 0.00012944928312208503,
"learning_rate": 9.059422872664476e-06,
"loss": 0.0,
"step": 789000
},
{
"epoch": 1.260323437785598,
"grad_norm": 14.440518379211426,
"learning_rate": 9.042151754654239e-06,
"loss": 0.0,
"step": 789500
},
{
"epoch": 1.2611216160235879,
"grad_norm": 0.00011326325329719111,
"learning_rate": 9.024890009594134e-06,
"loss": 0.0,
"step": 790000
},
{
"epoch": 1.2619197942615774,
"grad_norm": 0.0001579568488523364,
"learning_rate": 9.00763766464053e-06,
"loss": 0.0,
"step": 790500
},
{
"epoch": 1.262717972499567,
"grad_norm": 5.766981601715088,
"learning_rate": 8.990394746935e-06,
"loss": 0.0,
"step": 791000
},
{
"epoch": 1.2635161507375565,
"grad_norm": 0.00011029910092474893,
"learning_rate": 8.97316128360428e-06,
"loss": 0.0,
"step": 791500
},
{
"epoch": 1.264314328975546,
"grad_norm": 0.00014391505101229995,
"learning_rate": 8.955937301760239e-06,
"loss": 0.0,
"step": 792000
},
{
"epoch": 1.2651125072135359,
"grad_norm": 0.0001435024314559996,
"learning_rate": 8.938722828499834e-06,
"loss": 0.0,
"step": 792500
},
{
"epoch": 1.2659106854515254,
"grad_norm": 0.00015525566413998604,
"learning_rate": 8.921517890905052e-06,
"loss": 0.0,
"step": 793000
},
{
"epoch": 1.266708863689515,
"grad_norm": 0.00010429436224512756,
"learning_rate": 8.90432251604288e-06,
"loss": 0.0,
"step": 793500
},
{
"epoch": 1.2675070419275047,
"grad_norm": 0.00012090901145711541,
"learning_rate": 8.887136730965275e-06,
"loss": 0.0,
"step": 794000
},
{
"epoch": 1.2683052201654943,
"grad_norm": 0.00014295458095148206,
"learning_rate": 8.869960562709083e-06,
"loss": 0.0,
"step": 794500
},
{
"epoch": 1.2691033984034839,
"grad_norm": 0.00015002151485532522,
"learning_rate": 8.852794038296048e-06,
"loss": 0.0,
"step": 795000
},
{
"epoch": 1.2699015766414736,
"grad_norm": 0.00012087346840417013,
"learning_rate": 8.835637184732717e-06,
"loss": 0.0,
"step": 795500
},
{
"epoch": 1.2706997548794632,
"grad_norm": 0.00017985192243941128,
"learning_rate": 8.818490029010444e-06,
"loss": 0.0,
"step": 796000
},
{
"epoch": 1.2714979331174527,
"grad_norm": 8.896778308553621e-05,
"learning_rate": 8.80135259810531e-06,
"loss": 0.0,
"step": 796500
},
{
"epoch": 1.2722961113554423,
"grad_norm": 0.00011164528405060992,
"learning_rate": 8.784224918978105e-06,
"loss": 0.0,
"step": 797000
},
{
"epoch": 1.2730942895934318,
"grad_norm": 533.9027709960938,
"learning_rate": 8.767107018574276e-06,
"loss": 0.0,
"step": 797500
},
{
"epoch": 1.2738924678314216,
"grad_norm": 0.00014446699060499668,
"learning_rate": 8.749998923823887e-06,
"loss": 0.0,
"step": 798000
},
{
"epoch": 1.2746906460694112,
"grad_norm": 0.04612082615494728,
"learning_rate": 8.732900661641568e-06,
"loss": 0.0,
"step": 798500
},
{
"epoch": 1.2754888243074007,
"grad_norm": 0.00155142811127007,
"learning_rate": 8.715812258926501e-06,
"loss": 0.0,
"step": 799000
},
{
"epoch": 1.2762870025453905,
"grad_norm": 0.0004437122552189976,
"learning_rate": 8.698733742562327e-06,
"loss": 0.0,
"step": 799500
},
{
"epoch": 1.27708518078338,
"grad_norm": 0.00015456078108400106,
"learning_rate": 8.681665139417154e-06,
"loss": 0.0,
"step": 800000
},
{
"epoch": 1.2778833590213696,
"grad_norm": 0.00022585850092582405,
"learning_rate": 8.66460647634349e-06,
"loss": 0.0,
"step": 800500
},
{
"epoch": 1.2786815372593592,
"grad_norm": 0.0011763167567551136,
"learning_rate": 8.647557780178216e-06,
"loss": 0.0,
"step": 801000
},
{
"epoch": 1.2794797154973487,
"grad_norm": 0.00016555427282582968,
"learning_rate": 8.630519077742505e-06,
"loss": 0.0,
"step": 801500
},
{
"epoch": 1.2802778937353385,
"grad_norm": 0.11920250207185745,
"learning_rate": 8.613490395841833e-06,
"loss": 0.0,
"step": 802000
},
{
"epoch": 1.281076071973328,
"grad_norm": 0.00025985552929341793,
"learning_rate": 8.596471761265905e-06,
"loss": 0.0,
"step": 802500
},
{
"epoch": 1.2818742502113176,
"grad_norm": 0.00033153867116197944,
"learning_rate": 8.57946320078861e-06,
"loss": 0.0,
"step": 803000
},
{
"epoch": 1.2826724284493074,
"grad_norm": 0.0003908054204657674,
"learning_rate": 8.562464741168003e-06,
"loss": 0.0,
"step": 803500
},
{
"epoch": 1.283470606687297,
"grad_norm": 0.00017760110495146364,
"learning_rate": 8.545476409146235e-06,
"loss": 0.0,
"step": 804000
},
{
"epoch": 1.2842687849252865,
"grad_norm": 0.00015641027130186558,
"learning_rate": 8.528498231449543e-06,
"loss": 0.0,
"step": 804500
},
{
"epoch": 1.285066963163276,
"grad_norm": 0.00013975071487948298,
"learning_rate": 8.511530234788162e-06,
"loss": 0.0,
"step": 805000
},
{
"epoch": 1.2858651414012656,
"grad_norm": 0.0007206627633422613,
"learning_rate": 8.494572445856327e-06,
"loss": 0.0,
"step": 805500
},
{
"epoch": 1.2866633196392554,
"grad_norm": 0.000786223856266588,
"learning_rate": 8.477624891332226e-06,
"loss": 0.0,
"step": 806000
},
{
"epoch": 1.287461497877245,
"grad_norm": 0.00019913198775611818,
"learning_rate": 8.460687597877912e-06,
"loss": 0.0,
"step": 806500
},
{
"epoch": 1.2882596761152345,
"grad_norm": 0.00020240710000507534,
"learning_rate": 8.443760592139325e-06,
"loss": 0.0,
"step": 807000
},
{
"epoch": 1.2890578543532243,
"grad_norm": 0.0007126539712771773,
"learning_rate": 8.426843900746208e-06,
"loss": 0.0,
"step": 807500
},
{
"epoch": 1.2898560325912138,
"grad_norm": 0.00020748093083966523,
"learning_rate": 8.409937550312087e-06,
"loss": 0.0,
"step": 808000
},
{
"epoch": 1.2906542108292034,
"grad_norm": 0.00014328854740597308,
"learning_rate": 8.3930415674342e-06,
"loss": 0.0,
"step": 808500
},
{
"epoch": 1.2914523890671932,
"grad_norm": 0.0001533345493953675,
"learning_rate": 8.376155978693492e-06,
"loss": 0.0,
"step": 809000
},
{
"epoch": 1.2922505673051827,
"grad_norm": 0.00023107643937692046,
"learning_rate": 8.359280810654558e-06,
"loss": 0.0,
"step": 809500
},
{
"epoch": 1.2930487455431723,
"grad_norm": 0.00015860753774177283,
"learning_rate": 8.342416089865576e-06,
"loss": 0.0,
"step": 810000
},
{
"epoch": 1.2938469237811618,
"grad_norm": 0.00012862969015259296,
"learning_rate": 8.325561842858315e-06,
"loss": 0.0,
"step": 810500
},
{
"epoch": 1.2946451020191514,
"grad_norm": 0.0001139972810051404,
"learning_rate": 8.308718096148053e-06,
"loss": 0.0,
"step": 811000
},
{
"epoch": 1.2954432802571412,
"grad_norm": 0.00019040738698095083,
"learning_rate": 8.291884876233555e-06,
"loss": 0.0,
"step": 811500
},
{
"epoch": 1.2962414584951307,
"grad_norm": 0.0001693676895229146,
"learning_rate": 8.275062209597011e-06,
"loss": 0.0,
"step": 812000
},
{
"epoch": 1.2970396367331203,
"grad_norm": 0.0026823594234883785,
"learning_rate": 8.258250122704027e-06,
"loss": 0.0,
"step": 812500
},
{
"epoch": 1.29783781497111,
"grad_norm": 0.0002031605108641088,
"learning_rate": 8.241448642003559e-06,
"loss": 0.0,
"step": 813000
},
{
"epoch": 1.2986359932090996,
"grad_norm": 0.0022485863883048296,
"learning_rate": 8.224657793927868e-06,
"loss": 0.0,
"step": 813500
},
{
"epoch": 1.2994341714470892,
"grad_norm": 0.00010316159023204818,
"learning_rate": 8.207877604892493e-06,
"loss": 0.0,
"step": 814000
},
{
"epoch": 1.3002323496850787,
"grad_norm": 0.00014087182353250682,
"learning_rate": 8.191108101296213e-06,
"loss": 0.0,
"step": 814500
},
{
"epoch": 1.3010305279230683,
"grad_norm": 0.00015167437959462404,
"learning_rate": 8.17434930952099e-06,
"loss": 0.0,
"step": 815000
},
{
"epoch": 1.301828706161058,
"grad_norm": 0.00015953517868183553,
"learning_rate": 8.157601255931927e-06,
"loss": 0.0,
"step": 815500
},
{
"epoch": 1.3026268843990476,
"grad_norm": 0.00014891130558680743,
"learning_rate": 8.140863966877238e-06,
"loss": 0.0,
"step": 816000
},
{
"epoch": 1.3034250626370372,
"grad_norm": 0.00016368075739592314,
"learning_rate": 8.124137468688216e-06,
"loss": 0.0,
"step": 816500
},
{
"epoch": 1.304223240875027,
"grad_norm": 0.00012912409147247672,
"learning_rate": 8.10742178767915e-06,
"loss": 0.0,
"step": 817000
},
{
"epoch": 1.3050214191130165,
"grad_norm": 0.04891032353043556,
"learning_rate": 8.090716950147336e-06,
"loss": 0.0,
"step": 817500
},
{
"epoch": 1.305819597351006,
"grad_norm": 0.000139459443744272,
"learning_rate": 8.074022982373006e-06,
"loss": 0.0,
"step": 818000
},
{
"epoch": 1.3066177755889956,
"grad_norm": 0.00022227551380638033,
"learning_rate": 8.057339910619277e-06,
"loss": 0.0,
"step": 818500
},
{
"epoch": 1.3074159538269852,
"grad_norm": 0.0002710748231038451,
"learning_rate": 8.040667761132143e-06,
"loss": 0.0,
"step": 819000
},
{
"epoch": 1.308214132064975,
"grad_norm": 9.039805445354432e-05,
"learning_rate": 8.024006560140392e-06,
"loss": 0.0,
"step": 819500
},
{
"epoch": 1.3090123103029645,
"grad_norm": 0.009568951092660427,
"learning_rate": 8.007356333855626e-06,
"loss": 0.0,
"step": 820000
},
{
"epoch": 1.309810488540954,
"grad_norm": 0.00014985322195570916,
"learning_rate": 7.990717108472138e-06,
"loss": 0.0,
"step": 820500
},
{
"epoch": 1.3106086667789438,
"grad_norm": 0.00019329690258018672,
"learning_rate": 7.974088910166944e-06,
"loss": 0.0,
"step": 821000
},
{
"epoch": 1.3114068450169334,
"grad_norm": 0.00029314137645997107,
"learning_rate": 7.957471765099701e-06,
"loss": 0.0,
"step": 821500
},
{
"epoch": 1.312205023254923,
"grad_norm": 0.00020969565957784653,
"learning_rate": 7.940865699412673e-06,
"loss": 0.0,
"step": 822000
},
{
"epoch": 1.3130032014929125,
"grad_norm": 0.00025969123817048967,
"learning_rate": 7.92427073923071e-06,
"loss": 0.0,
"step": 822500
},
{
"epoch": 1.313801379730902,
"grad_norm": 0.000144842459121719,
"learning_rate": 7.907686910661158e-06,
"loss": 0.0,
"step": 823000
},
{
"epoch": 1.3145995579688918,
"grad_norm": 0.00017976704111788422,
"learning_rate": 7.8911142397939e-06,
"loss": 0.0,
"step": 823500
},
{
"epoch": 1.3153977362068814,
"grad_norm": 0.0001856670278357342,
"learning_rate": 7.874552752701218e-06,
"loss": 0.0,
"step": 824000
},
{
"epoch": 1.316195914444871,
"grad_norm": 0.0002021729596890509,
"learning_rate": 7.858002475437825e-06,
"loss": 0.0,
"step": 824500
},
{
"epoch": 1.3169940926828607,
"grad_norm": 0.00013654265785589814,
"learning_rate": 7.8414634340408e-06,
"loss": 0.0,
"step": 825000
},
{
"epoch": 1.3177922709208503,
"grad_norm": 0.00013490175479091704,
"learning_rate": 7.824935654529525e-06,
"loss": 0.0,
"step": 825500
},
{
"epoch": 1.3185904491588398,
"grad_norm": 9.905237675411627e-05,
"learning_rate": 7.808419162905695e-06,
"loss": 0.0,
"step": 826000
},
{
"epoch": 1.3193886273968296,
"grad_norm": 0.00026544061256572604,
"learning_rate": 7.791913985153204e-06,
"loss": 0.0,
"step": 826500
},
{
"epoch": 1.3201868056348192,
"grad_norm": 0.0002780807844828814,
"learning_rate": 7.775420147238204e-06,
"loss": 0.0,
"step": 827000
},
{
"epoch": 1.3209849838728087,
"grad_norm": 0.00015520601300522685,
"learning_rate": 7.75893767510896e-06,
"loss": 0.0,
"step": 827500
},
{
"epoch": 1.3217831621107983,
"grad_norm": 0.00017638910503592342,
"learning_rate": 7.74246659469587e-06,
"loss": 0.0,
"step": 828000
},
{
"epoch": 1.3225813403487878,
"grad_norm": 0.0001925562391988933,
"learning_rate": 7.726006931911415e-06,
"loss": 0.0,
"step": 828500
},
{
"epoch": 1.3233795185867776,
"grad_norm": 0.00015528348740190268,
"learning_rate": 7.709558712650111e-06,
"loss": 0.0,
"step": 829000
},
{
"epoch": 1.3241776968247672,
"grad_norm": 0.004120807629078627,
"learning_rate": 7.693121962788482e-06,
"loss": 0.0,
"step": 829500
},
{
"epoch": 1.3249758750627567,
"grad_norm": 0.00016603163385298103,
"learning_rate": 7.676696708184975e-06,
"loss": 0.0,
"step": 830000
},
{
"epoch": 1.3257740533007465,
"grad_norm": 0.00022943579824641347,
"learning_rate": 7.66028297468e-06,
"loss": 0.0,
"step": 830500
},
{
"epoch": 1.326572231538736,
"grad_norm": 0.005338750313967466,
"learning_rate": 7.643880788095805e-06,
"loss": 0.0,
"step": 831000
},
{
"epoch": 1.3273704097767256,
"grad_norm": 0.00014208181528374553,
"learning_rate": 7.62749017423648e-06,
"loss": 0.0,
"step": 831500
},
{
"epoch": 1.3281685880147152,
"grad_norm": 0.00011977060785284266,
"learning_rate": 7.611111158887916e-06,
"loss": 0.0,
"step": 832000
},
{
"epoch": 1.3289667662527047,
"grad_norm": 0.00016354784020222723,
"learning_rate": 7.594743767817755e-06,
"loss": 0.0,
"step": 832500
},
{
"epoch": 1.3297649444906945,
"grad_norm": 0.00036256128805689514,
"learning_rate": 7.578388026775356e-06,
"loss": 0.0,
"step": 833000
},
{
"epoch": 1.330563122728684,
"grad_norm": 9.693684114608914e-05,
"learning_rate": 7.5620439614917334e-06,
"loss": 0.0,
"step": 833500
},
{
"epoch": 1.3313613009666736,
"grad_norm": 0.00012745718413498253,
"learning_rate": 7.54571159767955e-06,
"loss": 0.0,
"step": 834000
},
{
"epoch": 1.3321594792046634,
"grad_norm": 0.00015118411101866513,
"learning_rate": 7.52939096103306e-06,
"loss": 0.0,
"step": 834500
},
{
"epoch": 1.332957657442653,
"grad_norm": 0.0001406726660206914,
"learning_rate": 7.5130820772280494e-06,
"loss": 0.0,
"step": 835000
},
{
"epoch": 1.3337558356806425,
"grad_norm": 0.00023740965116303414,
"learning_rate": 7.496784971921836e-06,
"loss": 0.0,
"step": 835500
},
{
"epoch": 1.334554013918632,
"grad_norm": 0.00013875133299734443,
"learning_rate": 7.4804996707531974e-06,
"loss": 0.0,
"step": 836000
},
{
"epoch": 1.3353521921566216,
"grad_norm": 0.0002137289848178625,
"learning_rate": 7.464226199342347e-06,
"loss": 0.0,
"step": 836500
},
{
"epoch": 1.3361503703946114,
"grad_norm": 0.00043378453119657934,
"learning_rate": 7.4479645832908724e-06,
"loss": 0.0,
"step": 837000
},
{
"epoch": 1.336948548632601,
"grad_norm": 0.00016079274064395577,
"learning_rate": 7.431714848181727e-06,
"loss": 0.0,
"step": 837500
},
{
"epoch": 1.3377467268705905,
"grad_norm": 0.0005202249740250409,
"learning_rate": 7.415477019579172e-06,
"loss": 0.0,
"step": 838000
},
{
"epoch": 1.3385449051085803,
"grad_norm": 0.000191315877600573,
"learning_rate": 7.39925112302872e-06,
"loss": 0.0,
"step": 838500
},
{
"epoch": 1.3393430833465698,
"grad_norm": 0.23445342481136322,
"learning_rate": 7.383037184057128e-06,
"loss": 0.0,
"step": 839000
},
{
"epoch": 1.3401412615845594,
"grad_norm": 0.01140950620174408,
"learning_rate": 7.36683522817234e-06,
"loss": 0.0,
"step": 839500
},
{
"epoch": 1.3409394398225492,
"grad_norm": 0.00017085122817661613,
"learning_rate": 7.35064528086345e-06,
"loss": 0.0,
"step": 840000
},
{
"epoch": 1.3417376180605387,
"grad_norm": 0.0003418387204874307,
"learning_rate": 7.334467367600643e-06,
"loss": 0.0,
"step": 840500
},
{
"epoch": 1.3425357962985283,
"grad_norm": 0.0002189553779317066,
"learning_rate": 7.318301513835188e-06,
"loss": 0.0,
"step": 841000
},
{
"epoch": 1.3433339745365178,
"grad_norm": 0.00013727162149734795,
"learning_rate": 7.3021477449993866e-06,
"loss": 0.0,
"step": 841500
},
{
"epoch": 1.3441321527745074,
"grad_norm": 0.00011598570563364774,
"learning_rate": 7.2860060865065075e-06,
"loss": 0.0,
"step": 842000
},
{
"epoch": 1.3449303310124971,
"grad_norm": 0.0001249085908057168,
"learning_rate": 7.269876563750783e-06,
"loss": 0.0,
"step": 842500
},
{
"epoch": 1.3457285092504867,
"grad_norm": 350.6066589355469,
"learning_rate": 7.253759202107352e-06,
"loss": 0.0,
"step": 843000
},
{
"epoch": 1.3465266874884763,
"grad_norm": 0.00022806675406172872,
"learning_rate": 7.237654026932224e-06,
"loss": 0.0,
"step": 843500
},
{
"epoch": 1.347324865726466,
"grad_norm": 0.0001397529267705977,
"learning_rate": 7.221561063562219e-06,
"loss": 0.0,
"step": 844000
},
{
"epoch": 1.3481230439644556,
"grad_norm": 0.0014991023344919086,
"learning_rate": 7.205480337314964e-06,
"loss": 0.0,
"step": 844500
},
{
"epoch": 1.3489212222024451,
"grad_norm": 0.0007024999940767884,
"learning_rate": 7.189411873488836e-06,
"loss": 0.0,
"step": 845000
},
{
"epoch": 1.3497194004404347,
"grad_norm": 0.00010580118396319449,
"learning_rate": 7.173355697362898e-06,
"loss": 0.0,
"step": 845500
},
{
"epoch": 1.3505175786784243,
"grad_norm": 0.0036384917329996824,
"learning_rate": 7.157311834196908e-06,
"loss": 0.0,
"step": 846000
},
{
"epoch": 1.351315756916414,
"grad_norm": 0.0009470462100580335,
"learning_rate": 7.141280309231241e-06,
"loss": 0.0,
"step": 846500
},
{
"epoch": 1.3521139351544036,
"grad_norm": 0.00014973332872614264,
"learning_rate": 7.125261147686855e-06,
"loss": 0.0,
"step": 847000
},
{
"epoch": 1.3529121133923931,
"grad_norm": 0.0022423311602324247,
"learning_rate": 7.10925437476527e-06,
"loss": 0.0,
"step": 847500
},
{
"epoch": 1.353710291630383,
"grad_norm": 0.0001411356934113428,
"learning_rate": 7.093260015648512e-06,
"loss": 0.0,
"step": 848000
},
{
"epoch": 1.3545084698683725,
"grad_norm": 0.0017032199539244175,
"learning_rate": 7.077278095499081e-06,
"loss": 0.0,
"step": 848500
},
{
"epoch": 1.355306648106362,
"grad_norm": 0.00015193774015642703,
"learning_rate": 7.061308639459893e-06,
"loss": 0.0,
"step": 849000
},
{
"epoch": 1.3561048263443516,
"grad_norm": 2119.16162109375,
"learning_rate": 7.04535167265427e-06,
"loss": 0.0,
"step": 849500
},
{
"epoch": 1.3569030045823411,
"grad_norm": 0.00017610577924642712,
"learning_rate": 7.0294072201858885e-06,
"loss": 0.0,
"step": 850000
},
{
"epoch": 1.357701182820331,
"grad_norm": 0.000205761069082655,
"learning_rate": 7.01347530713872e-06,
"loss": 0.0,
"step": 850500
},
{
"epoch": 1.3584993610583205,
"grad_norm": 0.0001279138377867639,
"learning_rate": 6.9975559585770245e-06,
"loss": 0.0,
"step": 851000
},
{
"epoch": 1.35929753929631,
"grad_norm": 0.00014980623382143676,
"learning_rate": 6.981649199545289e-06,
"loss": 0.0,
"step": 851500
},
{
"epoch": 1.3600957175342998,
"grad_norm": 0.0001443100773030892,
"learning_rate": 6.9657550550682035e-06,
"loss": 0.0,
"step": 852000
},
{
"epoch": 1.3608938957722894,
"grad_norm": 0.002811576472595334,
"learning_rate": 6.949873550150591e-06,
"loss": 0.0,
"step": 852500
},
{
"epoch": 1.361692074010279,
"grad_norm": 0.00020981239504180849,
"learning_rate": 6.93400470977741e-06,
"loss": 0.0,
"step": 853000
},
{
"epoch": 1.3624902522482685,
"grad_norm": 0.2709546983242035,
"learning_rate": 6.918148558913697e-06,
"loss": 0.0,
"step": 853500
},
{
"epoch": 1.363288430486258,
"grad_norm": 0.0001471164432587102,
"learning_rate": 6.902305122504502e-06,
"loss": 0.0,
"step": 854000
},
{
"epoch": 1.3640866087242478,
"grad_norm": 0.00018949166405946016,
"learning_rate": 6.886474425474902e-06,
"loss": 0.0,
"step": 854500
},
{
"epoch": 1.3648847869622374,
"grad_norm": 0.00011930407345062122,
"learning_rate": 6.870656492729898e-06,
"loss": 0.0,
"step": 855000
},
{
"epoch": 1.365682965200227,
"grad_norm": 9.36063879635185e-05,
"learning_rate": 6.854851349154454e-06,
"loss": 0.0,
"step": 855500
},
{
"epoch": 1.3664811434382167,
"grad_norm": 0.24007728695869446,
"learning_rate": 6.839059019613378e-06,
"loss": 0.0,
"step": 856000
},
{
"epoch": 1.3672793216762062,
"grad_norm": 0.00011204006295884028,
"learning_rate": 6.82327952895132e-06,
"loss": 0.0,
"step": 856500
},
{
"epoch": 1.3680774999141958,
"grad_norm": 7.074438326526433e-05,
"learning_rate": 6.807512901992764e-06,
"loss": 0.0,
"step": 857000
},
{
"epoch": 1.3688756781521856,
"grad_norm": 0.000122178447782062,
"learning_rate": 6.791759163541918e-06,
"loss": 0.0,
"step": 857500
},
{
"epoch": 1.3696738563901751,
"grad_norm": 0.0001083787574316375,
"learning_rate": 6.776018338382742e-06,
"loss": 0.0,
"step": 858000
},
{
"epoch": 1.3704720346281647,
"grad_norm": 0.00013601896353065968,
"learning_rate": 6.760290451278853e-06,
"loss": 0.0,
"step": 858500
},
{
"epoch": 1.3712702128661542,
"grad_norm": 0.001532147522084415,
"learning_rate": 6.744575526973552e-06,
"loss": 0.0,
"step": 859000
},
{
"epoch": 1.3720683911041438,
"grad_norm": 0.00012492662062868476,
"learning_rate": 6.728873590189714e-06,
"loss": 0.0,
"step": 859500
},
{
"epoch": 1.3728665693421336,
"grad_norm": 0.00010753708920674399,
"learning_rate": 6.713184665629786e-06,
"loss": 0.0,
"step": 860000
},
{
"epoch": 1.3736647475801231,
"grad_norm": 0.0001573436165926978,
"learning_rate": 6.69750877797576e-06,
"loss": 0.0,
"step": 860500
},
{
"epoch": 1.3744629258181127,
"grad_norm": 9.552572009852156e-05,
"learning_rate": 6.681845951889103e-06,
"loss": 0.0,
"step": 861000
},
{
"epoch": 1.3752611040561025,
"grad_norm": 0.00018931551312562078,
"learning_rate": 6.66619621201075e-06,
"loss": 0.0,
"step": 861500
},
{
"epoch": 1.376059282294092,
"grad_norm": 0.0004058021877426654,
"learning_rate": 6.650559582961019e-06,
"loss": 0.0,
"step": 862000
},
{
"epoch": 1.3768574605320816,
"grad_norm": 0.0001997623621718958,
"learning_rate": 6.634936089339643e-06,
"loss": 0.0,
"step": 862500
},
{
"epoch": 1.3776556387700711,
"grad_norm": 0.0004893583245575428,
"learning_rate": 6.619325755725658e-06,
"loss": 0.0,
"step": 863000
},
{
"epoch": 1.3784538170080607,
"grad_norm": 0.00013030781701672822,
"learning_rate": 6.603728606677401e-06,
"loss": 0.0,
"step": 863500
},
{
"epoch": 1.3792519952460505,
"grad_norm": 0.00012525140482466668,
"learning_rate": 6.588144666732477e-06,
"loss": 0.0,
"step": 864000
},
{
"epoch": 1.38005017348404,
"grad_norm": 0.00019924509979318827,
"learning_rate": 6.572573960407707e-06,
"loss": 0.0,
"step": 864500
},
{
"epoch": 1.3808483517220296,
"grad_norm": 0.0001296091650146991,
"learning_rate": 6.557016512199096e-06,
"loss": 0.0,
"step": 865000
},
{
"epoch": 1.3816465299600194,
"grad_norm": 0.00019620211969595402,
"learning_rate": 6.541472346581777e-06,
"loss": 0.0,
"step": 865500
},
{
"epoch": 1.382444708198009,
"grad_norm": 0.00017245823983103037,
"learning_rate": 6.525941488010001e-06,
"loss": 0.0,
"step": 866000
},
{
"epoch": 1.3832428864359985,
"grad_norm": 0.00019742832228075713,
"learning_rate": 6.510423960917086e-06,
"loss": 0.0,
"step": 866500
},
{
"epoch": 1.384041064673988,
"grad_norm": 0.0001866811653599143,
"learning_rate": 6.494919789715358e-06,
"loss": 0.0,
"step": 867000
},
{
"epoch": 1.3848392429119776,
"grad_norm": 0.003743622684851289,
"learning_rate": 6.479428998796151e-06,
"loss": 0.0,
"step": 867500
},
{
"epoch": 1.3856374211499674,
"grad_norm": 0.00019897932361345738,
"learning_rate": 6.463951612529742e-06,
"loss": 0.0,
"step": 868000
},
{
"epoch": 1.386435599387957,
"grad_norm": 0.00018085265764966607,
"learning_rate": 6.448487655265323e-06,
"loss": 0.0,
"step": 868500
},
{
"epoch": 1.3872337776259465,
"grad_norm": 0.00022326891485136002,
"learning_rate": 6.433037151330946e-06,
"loss": 0.0,
"step": 869000
},
{
"epoch": 1.3880319558639362,
"grad_norm": 0.0009524719207547605,
"learning_rate": 6.417600125033513e-06,
"loss": 0.0,
"step": 869500
},
{
"epoch": 1.3888301341019258,
"grad_norm": 0.00014634850958827883,
"learning_rate": 6.402176600658723e-06,
"loss": 0.0,
"step": 870000
},
{
"epoch": 1.3896283123399154,
"grad_norm": 0.00020068578305654228,
"learning_rate": 6.386766602471019e-06,
"loss": 0.0,
"step": 870500
},
{
"epoch": 1.3904264905779051,
"grad_norm": 0.00010230097541352734,
"learning_rate": 6.371370154713577e-06,
"loss": 0.0,
"step": 871000
},
{
"epoch": 1.3912246688158947,
"grad_norm": 0.00012358248932287097,
"learning_rate": 6.355987281608255e-06,
"loss": 0.0,
"step": 871500
},
{
"epoch": 1.3920228470538842,
"grad_norm": 0.00010632740304572508,
"learning_rate": 6.340618007355554e-06,
"loss": 0.0,
"step": 872000
},
{
"epoch": 1.3928210252918738,
"grad_norm": 9.845475142356008e-05,
"learning_rate": 6.325262356134572e-06,
"loss": 0.0,
"step": 872500
},
{
"epoch": 1.3936192035298633,
"grad_norm": 156.6703338623047,
"learning_rate": 6.309920352102985e-06,
"loss": 0.0,
"step": 873000
},
{
"epoch": 1.3944173817678531,
"grad_norm": 0.0001712299999780953,
"learning_rate": 6.294592019397005e-06,
"loss": 0.0,
"step": 873500
},
{
"epoch": 1.3952155600058427,
"grad_norm": 0.00036461843410506845,
"learning_rate": 6.279277382131317e-06,
"loss": 0.0,
"step": 874000
},
{
"epoch": 1.3960137382438322,
"grad_norm": 0.0001327757054241374,
"learning_rate": 6.2639764643990735e-06,
"loss": 0.0,
"step": 874500
},
{
"epoch": 1.396811916481822,
"grad_norm": 0.0001479165512137115,
"learning_rate": 6.248689290271848e-06,
"loss": 0.0,
"step": 875000
},
{
"epoch": 1.3976100947198116,
"grad_norm": 8.516235538991168e-05,
"learning_rate": 6.233415883799577e-06,
"loss": 0.0,
"step": 875500
},
{
"epoch": 1.3984082729578011,
"grad_norm": 0.00011310012632748112,
"learning_rate": 6.218156269010544e-06,
"loss": 0.0,
"step": 876000
},
{
"epoch": 1.3992064511957907,
"grad_norm": 0.00013644126011058688,
"learning_rate": 6.202910469911346e-06,
"loss": 0.0,
"step": 876500
},
{
"epoch": 1.4000046294337802,
"grad_norm": 0.0001388086675433442,
"learning_rate": 6.187678510486834e-06,
"loss": 0.0,
"step": 877000
},
{
"epoch": 1.40080280767177,
"grad_norm": 0.00012577083543874323,
"learning_rate": 6.172460414700082e-06,
"loss": 0.0,
"step": 877500
},
{
"epoch": 1.4016009859097596,
"grad_norm": 0.0001132668912759982,
"learning_rate": 6.157256206492363e-06,
"loss": 0.0,
"step": 878000
},
{
"epoch": 1.4023991641477491,
"grad_norm": 0.00019735400564968586,
"learning_rate": 6.1420659097831064e-06,
"loss": 0.0,
"step": 878500
},
{
"epoch": 1.403197342385739,
"grad_norm": 0.00015007038018666208,
"learning_rate": 6.126889548469834e-06,
"loss": 0.0,
"step": 879000
},
{
"epoch": 1.4039955206237285,
"grad_norm": 0.00014871565508656204,
"learning_rate": 6.111727146428168e-06,
"loss": 0.0,
"step": 879500
},
{
"epoch": 1.404793698861718,
"grad_norm": 0.0001545920968055725,
"learning_rate": 6.096578727511758e-06,
"loss": 0.0,
"step": 880000
},
{
"epoch": 1.4055918770997076,
"grad_norm": 0.009757994674146175,
"learning_rate": 6.081444315552264e-06,
"loss": 0.0,
"step": 880500
},
{
"epoch": 1.4063900553376971,
"grad_norm": 0.00010370996460551396,
"learning_rate": 6.066323934359293e-06,
"loss": 0.0,
"step": 881000
},
{
"epoch": 1.407188233575687,
"grad_norm": 0.0002125945466104895,
"learning_rate": 6.051217607720393e-06,
"loss": 0.0,
"step": 881500
},
{
"epoch": 1.4079864118136765,
"grad_norm": 0.0001540460652904585,
"learning_rate": 6.036125359401005e-06,
"loss": 0.0,
"step": 882000
},
{
"epoch": 1.408784590051666,
"grad_norm": 0.016482815146446228,
"learning_rate": 6.0210472131444e-06,
"loss": 0.0,
"step": 882500
},
{
"epoch": 1.4095827682896558,
"grad_norm": 0.0001845559454523027,
"learning_rate": 6.005983192671689e-06,
"loss": 0.0,
"step": 883000
},
{
"epoch": 1.4103809465276453,
"grad_norm": 0.00020905568089801818,
"learning_rate": 5.990933321681743e-06,
"loss": 0.0,
"step": 883500
},
{
"epoch": 1.411179124765635,
"grad_norm": 0.00013950113498140126,
"learning_rate": 5.9758976238511895e-06,
"loss": 0.0,
"step": 884000
},
{
"epoch": 1.4119773030036245,
"grad_norm": 0.00010148427099920809,
"learning_rate": 5.960876122834338e-06,
"loss": 0.0,
"step": 884500
},
{
"epoch": 1.412775481241614,
"grad_norm": 0.00019023822096642107,
"learning_rate": 5.945868842263167e-06,
"loss": 0.0,
"step": 885000
},
{
"epoch": 1.4135736594796038,
"grad_norm": 0.0007381364703178406,
"learning_rate": 5.930875805747308e-06,
"loss": 0.0,
"step": 885500
},
{
"epoch": 1.4143718377175933,
"grad_norm": 0.00023581883579026908,
"learning_rate": 5.915897036873949e-06,
"loss": 0.0,
"step": 886000
},
{
"epoch": 1.415170015955583,
"grad_norm": 0.00018272988381795585,
"learning_rate": 5.900932559207857e-06,
"loss": 0.0,
"step": 886500
},
{
"epoch": 1.4159681941935727,
"grad_norm": 0.00017383633530698717,
"learning_rate": 5.885982396291304e-06,
"loss": 0.0,
"step": 887000
},
{
"epoch": 1.4167663724315622,
"grad_norm": 0.00023192846856545657,
"learning_rate": 5.871046571644052e-06,
"loss": 0.0,
"step": 887500
},
{
"epoch": 1.4175645506695518,
"grad_norm": 0.00807888526469469,
"learning_rate": 5.8561251087632925e-06,
"loss": 0.0,
"step": 888000
},
{
"epoch": 1.4183627289075416,
"grad_norm": 0.00855404045432806,
"learning_rate": 5.84121803112362e-06,
"loss": 0.0,
"step": 888500
},
{
"epoch": 1.4191609071455311,
"grad_norm": 0.0001990912714973092,
"learning_rate": 5.826325362177028e-06,
"loss": 0.0,
"step": 889000
},
{
"epoch": 1.4199590853835207,
"grad_norm": 0.00013648335880134255,
"learning_rate": 5.811447125352806e-06,
"loss": 0.0,
"step": 889500
},
{
"epoch": 1.4207572636215102,
"grad_norm": 0.0002360754006076604,
"learning_rate": 5.796583344057563e-06,
"loss": 0.0,
"step": 890000
},
{
"epoch": 1.4215554418594998,
"grad_norm": 0.00024741183733567595,
"learning_rate": 5.781734041675143e-06,
"loss": 0.0,
"step": 890500
},
{
"epoch": 1.4223536200974896,
"grad_norm": 0.0004946837434545159,
"learning_rate": 5.76689924156665e-06,
"loss": 0.0,
"step": 891000
},
{
"epoch": 1.4231517983354791,
"grad_norm": 0.00025075749726966023,
"learning_rate": 5.752078967070334e-06,
"loss": 0.0,
"step": 891500
},
{
"epoch": 1.4239499765734687,
"grad_norm": 0.00029719286249019206,
"learning_rate": 5.737273241501599e-06,
"loss": 0.0,
"step": 892000
},
{
"epoch": 1.4247481548114584,
"grad_norm": 0.0014194652903825045,
"learning_rate": 5.722482088152992e-06,
"loss": 0.0,
"step": 892500
},
{
"epoch": 1.425546333049448,
"grad_norm": 0.00022001670731697232,
"learning_rate": 5.7077055302940966e-06,
"loss": 0.0,
"step": 893000
},
{
"epoch": 1.4263445112874376,
"grad_norm": 0.00024666590616106987,
"learning_rate": 5.692943591171561e-06,
"loss": 0.0,
"step": 893500
},
{
"epoch": 1.427142689525427,
"grad_norm": 0.0002910353650804609,
"learning_rate": 5.6781962940090146e-06,
"loss": 0.0,
"step": 894000
},
{
"epoch": 1.4279408677634167,
"grad_norm": 2379.8115234375,
"learning_rate": 5.663463662007065e-06,
"loss": 0.0,
"step": 894500
},
{
"epoch": 1.4287390460014064,
"grad_norm": 0.00021346789435483515,
"learning_rate": 5.6487457183432515e-06,
"loss": 0.0,
"step": 895000
},
{
"epoch": 1.429537224239396,
"grad_norm": 0.00022312205692287534,
"learning_rate": 5.634042486171992e-06,
"loss": 0.0,
"step": 895500
},
{
"epoch": 1.4303354024773856,
"grad_norm": 0.0034749663900583982,
"learning_rate": 5.6193539886245685e-06,
"loss": 0.0,
"step": 896000
},
{
"epoch": 1.4311335807153753,
"grad_norm": 0.0002445397840347141,
"learning_rate": 5.604680248809083e-06,
"loss": 0.0,
"step": 896500
},
{
"epoch": 1.4319317589533649,
"grad_norm": 0.00021094012481626123,
"learning_rate": 5.590021289810424e-06,
"loss": 0.0,
"step": 897000
},
{
"epoch": 1.4327299371913544,
"grad_norm": 0.0002627313369885087,
"learning_rate": 5.575377134690211e-06,
"loss": 0.0,
"step": 897500
},
{
"epoch": 1.433528115429344,
"grad_norm": 0.0008308569085784256,
"learning_rate": 5.56074780648679e-06,
"loss": 0.0,
"step": 898000
},
{
"epoch": 1.4343262936673336,
"grad_norm": 0.0001924873940879479,
"learning_rate": 5.54613332821518e-06,
"loss": 0.0,
"step": 898500
},
{
"epoch": 1.4351244719053233,
"grad_norm": 0.0002441892575006932,
"learning_rate": 5.531533722867024e-06,
"loss": 0.0,
"step": 899000
},
{
"epoch": 1.4359226501433129,
"grad_norm": 0.0004263210576027632,
"learning_rate": 5.51694901341058e-06,
"loss": 0.0,
"step": 899500
},
{
"epoch": 1.4367208283813024,
"grad_norm": 0.0001925066317198798,
"learning_rate": 5.50237922279067e-06,
"loss": 0.0,
"step": 900000
},
{
"epoch": 1.4367208283813024,
"eval_loss": 1.5834675650694408e-05,
"eval_runtime": 25429.0967,
"eval_samples_per_second": 87.588,
"eval_steps_per_second": 2.737,
"step": 900000
},
{
"epoch": 1.4375190066192922,
"grad_norm": 0.00017933818162418902,
"learning_rate": 5.487824373928646e-06,
"loss": 0.0,
"step": 900500
},
{
"epoch": 1.4383171848572818,
"grad_norm": 0.00016729129129089415,
"learning_rate": 5.473284489722342e-06,
"loss": 0.0,
"step": 901000
},
{
"epoch": 1.4391153630952713,
"grad_norm": 0.00021027770708315074,
"learning_rate": 5.458759593046065e-06,
"loss": 0.0,
"step": 901500
},
{
"epoch": 1.4399135413332609,
"grad_norm": 0.00024220098566729575,
"learning_rate": 5.444249706750537e-06,
"loss": 0.0,
"step": 902000
},
{
"epoch": 1.4407117195712507,
"grad_norm": 0.0009675936307758093,
"learning_rate": 5.42975485366286e-06,
"loss": 0.0,
"step": 902500
},
{
"epoch": 1.4415098978092402,
"grad_norm": 0.002770686289295554,
"learning_rate": 5.415275056586492e-06,
"loss": 0.0,
"step": 903000
},
{
"epoch": 1.4423080760472298,
"grad_norm": 0.00026132797938771546,
"learning_rate": 5.400810338301212e-06,
"loss": 0.0,
"step": 903500
},
{
"epoch": 1.4431062542852193,
"grad_norm": 0.0002459358365740627,
"learning_rate": 5.386360721563056e-06,
"loss": 0.0,
"step": 904000
},
{
"epoch": 1.443904432523209,
"grad_norm": 0.0004831771075259894,
"learning_rate": 5.371926229104321e-06,
"loss": 0.0,
"step": 904500
},
{
"epoch": 1.4447026107611987,
"grad_norm": 0.0001880936761153862,
"learning_rate": 5.357506883633503e-06,
"loss": 0.0,
"step": 905000
},
{
"epoch": 1.4455007889991882,
"grad_norm": 0.00023097256780602038,
"learning_rate": 5.343102707835275e-06,
"loss": 0.0,
"step": 905500
},
{
"epoch": 1.446298967237178,
"grad_norm": 0.00031300674891099334,
"learning_rate": 5.328713724370429e-06,
"loss": 0.0,
"step": 906000
},
{
"epoch": 1.4470971454751675,
"grad_norm": 0.0001413863938068971,
"learning_rate": 5.314339955875871e-06,
"loss": 0.0,
"step": 906500
},
{
"epoch": 1.447895323713157,
"grad_norm": 0.00023202685406431556,
"learning_rate": 5.299981424964573e-06,
"loss": 0.0,
"step": 907000
},
{
"epoch": 1.4486935019511467,
"grad_norm": 0.023249566555023193,
"learning_rate": 5.285638154225516e-06,
"loss": 0.0,
"step": 907500
},
{
"epoch": 1.4494916801891362,
"grad_norm": 0.00018502937746234238,
"learning_rate": 5.271310166223693e-06,
"loss": 0.0,
"step": 908000
},
{
"epoch": 1.450289858427126,
"grad_norm": 0.0005356152541935444,
"learning_rate": 5.256997483500046e-06,
"loss": 0.0,
"step": 908500
},
{
"epoch": 1.4510880366651155,
"grad_norm": 0.0037611278239637613,
"learning_rate": 5.242700128571443e-06,
"loss": 0.0,
"step": 909000
},
{
"epoch": 1.451886214903105,
"grad_norm": 0.0002823321265168488,
"learning_rate": 5.2284181239306296e-06,
"loss": 0.0,
"step": 909500
},
{
"epoch": 1.4526843931410949,
"grad_norm": 0.0002303695655427873,
"learning_rate": 5.214151492046206e-06,
"loss": 0.0,
"step": 910000
},
{
"epoch": 1.4534825713790844,
"grad_norm": 0.00018809006724040955,
"learning_rate": 5.199900255362598e-06,
"loss": 0.0,
"step": 910500
},
{
"epoch": 1.454280749617074,
"grad_norm": 0.0005356586189009249,
"learning_rate": 5.18566443629999e-06,
"loss": 0.0,
"step": 911000
},
{
"epoch": 1.4550789278550635,
"grad_norm": 0.00024188545648939908,
"learning_rate": 5.171444057254331e-06,
"loss": 0.0,
"step": 911500
},
{
"epoch": 1.455877106093053,
"grad_norm": 0.0004672040231525898,
"learning_rate": 5.15723914059727e-06,
"loss": 0.0,
"step": 912000
},
{
"epoch": 1.4566752843310429,
"grad_norm": 0.00026381740462966263,
"learning_rate": 5.14304970867614e-06,
"loss": 0.0,
"step": 912500
},
{
"epoch": 1.4574734625690324,
"grad_norm": 0.00014812721929047257,
"learning_rate": 5.1288757838138945e-06,
"loss": 0.0,
"step": 913000
},
{
"epoch": 1.458271640807022,
"grad_norm": 0.0001770323870005086,
"learning_rate": 5.114717388309109e-06,
"loss": 0.0,
"step": 913500
},
{
"epoch": 1.4590698190450118,
"grad_norm": 0.00019775221880991012,
"learning_rate": 5.100574544435927e-06,
"loss": 0.0,
"step": 914000
},
{
"epoch": 1.4598679972830013,
"grad_norm": 0.00034710581530816853,
"learning_rate": 5.086447274444008e-06,
"loss": 0.0,
"step": 914500
},
{
"epoch": 1.4606661755209909,
"grad_norm": 0.00025523340445943177,
"learning_rate": 5.072335600558529e-06,
"loss": 0.0,
"step": 915000
},
{
"epoch": 1.4614643537589804,
"grad_norm": 0.00015108758816495538,
"learning_rate": 5.058239544980128e-06,
"loss": 0.0,
"step": 915500
},
{
"epoch": 1.46226253199697,
"grad_norm": 0.00018012001237366349,
"learning_rate": 5.044159129884872e-06,
"loss": 0.0,
"step": 916000
},
{
"epoch": 1.4630607102349598,
"grad_norm": 0.0001702363369986415,
"learning_rate": 5.030094377424215e-06,
"loss": 0.0,
"step": 916500
},
{
"epoch": 1.4638588884729493,
"grad_norm": 0.00013683621364179999,
"learning_rate": 5.016045309724964e-06,
"loss": 0.0,
"step": 917000
},
{
"epoch": 1.4646570667109389,
"grad_norm": 0.0001706535113044083,
"learning_rate": 5.002011948889283e-06,
"loss": 0.0,
"step": 917500
},
{
"epoch": 1.4654552449489286,
"grad_norm": 0.0001231006026500836,
"learning_rate": 4.987994316994589e-06,
"loss": 0.0,
"step": 918000
},
{
"epoch": 1.4662534231869182,
"grad_norm": 0.00028214300982654095,
"learning_rate": 4.973992436093574e-06,
"loss": 0.0,
"step": 918500
},
{
"epoch": 1.4670516014249078,
"grad_norm": 0.00048577613779343665,
"learning_rate": 4.960006328214146e-06,
"loss": 0.0,
"step": 919000
},
{
"epoch": 1.4678497796628975,
"grad_norm": 0.0019849666859954596,
"learning_rate": 4.946036015359403e-06,
"loss": 0.0,
"step": 919500
},
{
"epoch": 1.468647957900887,
"grad_norm": 0.0016200316604226828,
"learning_rate": 4.9320815195075855e-06,
"loss": 0.0,
"step": 920000
},
{
"epoch": 1.4694461361388766,
"grad_norm": 0.0004031170974485576,
"learning_rate": 4.918142862612043e-06,
"loss": 0.0,
"step": 920500
},
{
"epoch": 1.4702443143768662,
"grad_norm": 0.00026207268820144236,
"learning_rate": 4.904220066601242e-06,
"loss": 0.0,
"step": 921000
},
{
"epoch": 1.4710424926148558,
"grad_norm": 0.00015835427620913833,
"learning_rate": 4.890313153378654e-06,
"loss": 0.0,
"step": 921500
},
{
"epoch": 1.4718406708528455,
"grad_norm": 0.0002738400362432003,
"learning_rate": 4.8764221448227946e-06,
"loss": 0.0,
"step": 922000
},
{
"epoch": 1.472638849090835,
"grad_norm": 0.00012282826355658472,
"learning_rate": 4.86254706278713e-06,
"loss": 0.0,
"step": 922500
},
{
"epoch": 1.4734370273288246,
"grad_norm": 0.001346366130746901,
"learning_rate": 4.848687929100107e-06,
"loss": 0.0,
"step": 923000
},
{
"epoch": 1.4742352055668144,
"grad_norm": 0.00027137529104948044,
"learning_rate": 4.834844765565053e-06,
"loss": 0.0,
"step": 923500
},
{
"epoch": 1.475033383804804,
"grad_norm": 0.00016544785466976464,
"learning_rate": 4.821017593960169e-06,
"loss": 0.0,
"step": 924000
},
{
"epoch": 1.4758315620427935,
"grad_norm": 0.0002261428744532168,
"learning_rate": 4.8072064360385285e-06,
"loss": 0.0,
"step": 924500
},
{
"epoch": 1.476629740280783,
"grad_norm": 0.00015417086251545697,
"learning_rate": 4.7934113135279755e-06,
"loss": 0.0,
"step": 925000
},
{
"epoch": 1.4774279185187726,
"grad_norm": 0.00016388327640015632,
"learning_rate": 4.779632248131156e-06,
"loss": 0.0,
"step": 925500
},
{
"epoch": 1.4782260967567624,
"grad_norm": 0.00015743187395855784,
"learning_rate": 4.76586926152543e-06,
"loss": 0.0,
"step": 926000
},
{
"epoch": 1.479024274994752,
"grad_norm": 0.00023688154760748148,
"learning_rate": 4.752122375362879e-06,
"loss": 0.0,
"step": 926500
},
{
"epoch": 1.4798224532327415,
"grad_norm": 0.00017375938477925956,
"learning_rate": 4.7383916112702564e-06,
"loss": 0.0,
"step": 927000
},
{
"epoch": 1.4806206314707313,
"grad_norm": 0.00015891625662334263,
"learning_rate": 4.724676990848932e-06,
"loss": 0.0,
"step": 927500
},
{
"epoch": 1.4814188097087209,
"grad_norm": 0.00025687378365546465,
"learning_rate": 4.710978535674908e-06,
"loss": 0.0,
"step": 928000
},
{
"epoch": 1.4822169879467104,
"grad_norm": 0.010246257297694683,
"learning_rate": 4.69729626729873e-06,
"loss": 0.0,
"step": 928500
},
{
"epoch": 1.4830151661847,
"grad_norm": 0.0002825473784469068,
"learning_rate": 4.683630207245494e-06,
"loss": 0.0,
"step": 929000
},
{
"epoch": 1.4838133444226895,
"grad_norm": 0.00028282523271627724,
"learning_rate": 4.669980377014784e-06,
"loss": 0.0,
"step": 929500
},
{
"epoch": 1.4846115226606793,
"grad_norm": 0.00032856714096851647,
"learning_rate": 4.65634679808066e-06,
"loss": 0.0,
"step": 930000
},
{
"epoch": 1.4854097008986689,
"grad_norm": 0.00027473040972836316,
"learning_rate": 4.642729491891618e-06,
"loss": 0.0,
"step": 930500
},
{
"epoch": 1.4862078791366584,
"grad_norm": 0.0007247019093483686,
"learning_rate": 4.629128479870542e-06,
"loss": 0.0,
"step": 931000
},
{
"epoch": 1.4870060573746482,
"grad_norm": 0.0002671232505235821,
"learning_rate": 4.6155437834146915e-06,
"loss": 0.0,
"step": 931500
},
{
"epoch": 1.4878042356126377,
"grad_norm": 0.00021676292817573994,
"learning_rate": 4.601975423895655e-06,
"loss": 0.0,
"step": 932000
},
{
"epoch": 1.4886024138506273,
"grad_norm": 0.00023404511739499867,
"learning_rate": 4.588423422659326e-06,
"loss": 0.0,
"step": 932500
},
{
"epoch": 1.4894005920886169,
"grad_norm": 0.0002494192449375987,
"learning_rate": 4.574887801025846e-06,
"loss": 0.0,
"step": 933000
},
{
"epoch": 1.4901987703266064,
"grad_norm": 0.00022211413306649774,
"learning_rate": 4.561368580289603e-06,
"loss": 0.0,
"step": 933500
},
{
"epoch": 1.4909969485645962,
"grad_norm": 0.00018211059796158224,
"learning_rate": 4.547865781719185e-06,
"loss": 0.0,
"step": 934000
},
{
"epoch": 1.4917951268025857,
"grad_norm": 0.00020940111426170915,
"learning_rate": 4.534379426557327e-06,
"loss": 0.0,
"step": 934500
},
{
"epoch": 1.4925933050405753,
"grad_norm": 0.00019367334607522935,
"learning_rate": 4.520909536020911e-06,
"loss": 0.0,
"step": 935000
},
{
"epoch": 1.493391483278565,
"grad_norm": 0.0001935142936417833,
"learning_rate": 4.5074561313009185e-06,
"loss": 0.0,
"step": 935500
},
{
"epoch": 1.4941896615165546,
"grad_norm": 0.00019816748681478202,
"learning_rate": 4.494019233562378e-06,
"loss": 0.0,
"step": 936000
},
{
"epoch": 1.4949878397545442,
"grad_norm": 0.00024318444775417447,
"learning_rate": 4.480598863944364e-06,
"loss": 0.0,
"step": 936500
},
{
"epoch": 1.495786017992534,
"grad_norm": 0.0001876988826552406,
"learning_rate": 4.467195043559946e-06,
"loss": 0.0,
"step": 937000
},
{
"epoch": 1.4965841962305235,
"grad_norm": 0.0003371692728251219,
"learning_rate": 4.453807793496158e-06,
"loss": 0.0,
"step": 937500
},
{
"epoch": 1.497382374468513,
"grad_norm": 0.0003051517123822123,
"learning_rate": 4.440437134813959e-06,
"loss": 0.0,
"step": 938000
},
{
"epoch": 1.4981805527065026,
"grad_norm": 0.0002651779795996845,
"learning_rate": 4.427083088548214e-06,
"loss": 0.0,
"step": 938500
},
{
"epoch": 1.4989787309444922,
"grad_norm": 0.004931437782943249,
"learning_rate": 4.413745675707652e-06,
"loss": 0.0,
"step": 939000
},
{
"epoch": 1.499776909182482,
"grad_norm": 0.00023486409918405116,
"learning_rate": 4.40042491727483e-06,
"loss": 0.0,
"step": 939500
},
{
"epoch": 1.5005750874204715,
"grad_norm": 0.0002614731201902032,
"learning_rate": 4.387120834206106e-06,
"loss": 0.0,
"step": 940000
},
{
"epoch": 1.501373265658461,
"grad_norm": 0.0002206834906246513,
"learning_rate": 4.373833447431606e-06,
"loss": 0.0,
"step": 940500
},
{
"epoch": 1.5021714438964509,
"grad_norm": 0.00020592297369148582,
"learning_rate": 4.360562777855192e-06,
"loss": 0.0,
"step": 941000
},
{
"epoch": 1.5029696221344404,
"grad_norm": 0.00018462153093423694,
"learning_rate": 4.3473088463544155e-06,
"loss": 0.0,
"step": 941500
},
{
"epoch": 1.50376780037243,
"grad_norm": 0.0002245952346129343,
"learning_rate": 4.334071673780505e-06,
"loss": 0.0,
"step": 942000
},
{
"epoch": 1.5045659786104197,
"grad_norm": 0.0001867782702902332,
"learning_rate": 4.320851280958325e-06,
"loss": 0.0,
"step": 942500
},
{
"epoch": 1.505364156848409,
"grad_norm": 0.00026211480144411325,
"learning_rate": 4.307647688686331e-06,
"loss": 0.0,
"step": 943000
},
{
"epoch": 1.5061623350863989,
"grad_norm": 0.003524922300130129,
"learning_rate": 4.294460917736556e-06,
"loss": 0.0,
"step": 943500
},
{
"epoch": 1.5069605133243884,
"grad_norm": 0.00023060395324137062,
"learning_rate": 4.281290988854572e-06,
"loss": 0.0,
"step": 944000
},
{
"epoch": 1.507758691562378,
"grad_norm": 0.00018296600319445133,
"learning_rate": 4.268137922759453e-06,
"loss": 0.0,
"step": 944500
},
{
"epoch": 1.5085568698003677,
"grad_norm": 0.0002622704196255654,
"learning_rate": 4.255001740143732e-06,
"loss": 0.0,
"step": 945000
},
{
"epoch": 1.5093550480383573,
"grad_norm": 0.0001771461102180183,
"learning_rate": 4.2418824616733995e-06,
"loss": 0.0,
"step": 945500
},
{
"epoch": 1.5101532262763468,
"grad_norm": 0.0009799576364457607,
"learning_rate": 4.228780107987845e-06,
"loss": 0.0,
"step": 946000
},
{
"epoch": 1.5109514045143366,
"grad_norm": 0.00039355512126348913,
"learning_rate": 4.215694699699823e-06,
"loss": 0.0,
"step": 946500
},
{
"epoch": 1.511749582752326,
"grad_norm": 0.00020366064563859254,
"learning_rate": 4.202626257395441e-06,
"loss": 0.0,
"step": 947000
},
{
"epoch": 1.5125477609903157,
"grad_norm": 0.0002536962565500289,
"learning_rate": 4.189574801634108e-06,
"loss": 0.0,
"step": 947500
},
{
"epoch": 1.5133459392283053,
"grad_norm": 0.00019765354227274656,
"learning_rate": 4.176540352948524e-06,
"loss": 0.0,
"step": 948000
},
{
"epoch": 1.5141441174662948,
"grad_norm": 0.00039374298648908734,
"learning_rate": 4.1635229318446124e-06,
"loss": 0.0,
"step": 948500
},
{
"epoch": 1.5149422957042846,
"grad_norm": 0.00021904372260905802,
"learning_rate": 4.150522558801511e-06,
"loss": 0.0,
"step": 949000
},
{
"epoch": 1.5157404739422742,
"grad_norm": 0.00021933818061370403,
"learning_rate": 4.137539254271564e-06,
"loss": 0.0,
"step": 949500
},
{
"epoch": 1.5165386521802637,
"grad_norm": 0.010109743103384972,
"learning_rate": 4.124573038680228e-06,
"loss": 0.0,
"step": 950000
},
{
"epoch": 1.5173368304182535,
"grad_norm": 0.00024012771609704942,
"learning_rate": 4.111623932426095e-06,
"loss": 0.0,
"step": 950500
},
{
"epoch": 1.5181350086562428,
"grad_norm": 0.0003029144718311727,
"learning_rate": 4.0986919558808405e-06,
"loss": 0.0,
"step": 951000
},
{
"epoch": 1.5189331868942326,
"grad_norm": 0.0001967994321603328,
"learning_rate": 4.085777129389188e-06,
"loss": 0.0,
"step": 951500
},
{
"epoch": 1.5197313651322222,
"grad_norm": 0.00030280096689239144,
"learning_rate": 4.072879473268879e-06,
"loss": 0.0,
"step": 952000
},
{
"epoch": 1.5205295433702117,
"grad_norm": 0.0005742062348872423,
"learning_rate": 4.05999900781063e-06,
"loss": 0.0,
"step": 952500
},
{
"epoch": 1.5213277216082015,
"grad_norm": 0.00022738358529750258,
"learning_rate": 4.047135753278146e-06,
"loss": 0.0,
"step": 953000
},
{
"epoch": 1.522125899846191,
"grad_norm": 0.0001853039429988712,
"learning_rate": 4.034289729908022e-06,
"loss": 0.0,
"step": 953500
},
{
"epoch": 1.5229240780841806,
"grad_norm": 0.0002849490556400269,
"learning_rate": 4.021460957909764e-06,
"loss": 0.0,
"step": 954000
},
{
"epoch": 1.5237222563221704,
"grad_norm": 0.0005084871663711965,
"learning_rate": 4.008649457465735e-06,
"loss": 0.0,
"step": 954500
},
{
"epoch": 1.5245204345601597,
"grad_norm": 0.00028648623265326023,
"learning_rate": 3.995855248731117e-06,
"loss": 0.0,
"step": 955000
},
{
"epoch": 1.5253186127981495,
"grad_norm": 0.0002624321496114135,
"learning_rate": 3.9830783518339005e-06,
"loss": 0.0,
"step": 955500
},
{
"epoch": 1.526116791036139,
"grad_norm": 0.0007837973535060883,
"learning_rate": 3.970318786874825e-06,
"loss": 0.0,
"step": 956000
},
{
"epoch": 1.5269149692741286,
"grad_norm": 0.0002491927589289844,
"learning_rate": 3.95757657392739e-06,
"loss": 0.0,
"step": 956500
},
{
"epoch": 1.5277131475121184,
"grad_norm": 0.0007153134210966527,
"learning_rate": 3.944851733037768e-06,
"loss": 0.0,
"step": 957000
},
{
"epoch": 1.528511325750108,
"grad_norm": 0.00027355499332770705,
"learning_rate": 3.93214428422482e-06,
"loss": 0.0,
"step": 957500
},
{
"epoch": 1.5293095039880975,
"grad_norm": 0.0003587014216464013,
"learning_rate": 3.919454247480034e-06,
"loss": 0.0,
"step": 958000
},
{
"epoch": 1.5301076822260873,
"grad_norm": 0.0002984220045618713,
"learning_rate": 3.906781642767514e-06,
"loss": 0.0,
"step": 958500
},
{
"epoch": 1.5309058604640768,
"grad_norm": 0.04637977480888367,
"learning_rate": 3.8941264900239396e-06,
"loss": 0.0,
"step": 959000
},
{
"epoch": 1.5317040387020664,
"grad_norm": 0.0003292471228633076,
"learning_rate": 3.881488809158518e-06,
"loss": 0.0,
"step": 959500
},
{
"epoch": 1.5325022169400562,
"grad_norm": 0.000271444208920002,
"learning_rate": 3.8688686200530035e-06,
"loss": 0.0,
"step": 960000
},
{
"epoch": 1.5333003951780455,
"grad_norm": 0.0002163940080208704,
"learning_rate": 3.856265942561596e-06,
"loss": 0.0,
"step": 960500
},
{
"epoch": 1.5340985734160353,
"grad_norm": 0.00026649428764358163,
"learning_rate": 3.843680796510972e-06,
"loss": 0.0,
"step": 961000
},
{
"epoch": 1.5348967516540248,
"grad_norm": 0.0002405370760243386,
"learning_rate": 3.831113201700205e-06,
"loss": 0.0,
"step": 961500
},
{
"epoch": 1.5356949298920144,
"grad_norm": 0.0004005729279015213,
"learning_rate": 3.818563177900777e-06,
"loss": 0.0,
"step": 962000
},
{
"epoch": 1.5364931081300042,
"grad_norm": 0.0006678230129182339,
"learning_rate": 3.8060307448565195e-06,
"loss": 0.0,
"step": 962500
},
{
"epoch": 1.5372912863679937,
"grad_norm": 0.0003191915457136929,
"learning_rate": 3.7935159222835787e-06,
"loss": 0.0,
"step": 963000
},
{
"epoch": 1.5380894646059833,
"grad_norm": 0.0002546892501413822,
"learning_rate": 3.781018729870423e-06,
"loss": 0.0,
"step": 963500
},
{
"epoch": 1.538887642843973,
"grad_norm": 0.000205778909730725,
"learning_rate": 3.7685391872777607e-06,
"loss": 0.0,
"step": 964000
},
{
"epoch": 1.5396858210819624,
"grad_norm": 0.00045933053479529917,
"learning_rate": 3.756077314138534e-06,
"loss": 0.0,
"step": 964500
},
{
"epoch": 1.5404839993199522,
"grad_norm": 0.0007948831771500409,
"learning_rate": 3.7436331300579004e-06,
"loss": 0.0,
"step": 965000
},
{
"epoch": 1.5412821775579417,
"grad_norm": 0.0002442169061396271,
"learning_rate": 3.731206654613181e-06,
"loss": 0.0,
"step": 965500
},
{
"epoch": 1.5420803557959313,
"grad_norm": 0.00023657285782974213,
"learning_rate": 3.718797907353844e-06,
"loss": 0.0,
"step": 966000
},
{
"epoch": 1.542878534033921,
"grad_norm": 0.0002562176960054785,
"learning_rate": 3.7064069078014532e-06,
"loss": 0.0,
"step": 966500
},
{
"epoch": 1.5436767122719106,
"grad_norm": 0.00023075290664564818,
"learning_rate": 3.694033675449667e-06,
"loss": 0.0,
"step": 967000
},
{
"epoch": 1.5444748905099002,
"grad_norm": 0.00022441007604356855,
"learning_rate": 3.6816782297641884e-06,
"loss": 0.0,
"step": 967500
},
{
"epoch": 1.54527306874789,
"grad_norm": 0.28657254576683044,
"learning_rate": 3.6693405901827277e-06,
"loss": 0.0,
"step": 968000
},
{
"epoch": 1.5460712469858793,
"grad_norm": 0.0003447630733717233,
"learning_rate": 3.657020776114994e-06,
"loss": 0.0,
"step": 968500
},
{
"epoch": 1.546869425223869,
"grad_norm": 0.00013505498645827174,
"learning_rate": 3.6447188069426514e-06,
"loss": 0.0,
"step": 969000
},
{
"epoch": 1.5476676034618586,
"grad_norm": 0.00015060935402289033,
"learning_rate": 3.6324347020192904e-06,
"loss": 0.0,
"step": 969500
},
{
"epoch": 1.5484657816998482,
"grad_norm": 0.00021117663709446788,
"learning_rate": 3.6201684806703894e-06,
"loss": 0.0,
"step": 970000
},
{
"epoch": 1.549263959937838,
"grad_norm": 0.00036845364957116544,
"learning_rate": 3.6079201621933017e-06,
"loss": 0.0,
"step": 970500
},
{
"epoch": 1.5500621381758275,
"grad_norm": 0.00026166459429077804,
"learning_rate": 3.5956897658572136e-06,
"loss": 0.0,
"step": 971000
},
{
"epoch": 1.550860316413817,
"grad_norm": 0.0002735615707933903,
"learning_rate": 3.583477310903109e-06,
"loss": 0.0,
"step": 971500
},
{
"epoch": 1.5516584946518068,
"grad_norm": 0.00018594361608847976,
"learning_rate": 3.5712828165437557e-06,
"loss": 0.0,
"step": 972000
},
{
"epoch": 1.5524566728897962,
"grad_norm": 0.00013815864804200828,
"learning_rate": 3.559106301963661e-06,
"loss": 0.0,
"step": 972500
},
{
"epoch": 1.553254851127786,
"grad_norm": 0.001760584069415927,
"learning_rate": 3.5469477863190504e-06,
"loss": 0.0,
"step": 973000
},
{
"epoch": 1.5540530293657757,
"grad_norm": 0.00026064462144859135,
"learning_rate": 3.534807288737824e-06,
"loss": 0.0,
"step": 973500
},
{
"epoch": 1.554851207603765,
"grad_norm": 0.0005155335529707372,
"learning_rate": 3.522684828319543e-06,
"loss": 0.0,
"step": 974000
},
{
"epoch": 1.5556493858417548,
"grad_norm": 0.00018664480012375861,
"learning_rate": 3.510580424135396e-06,
"loss": 0.0,
"step": 974500
},
{
"epoch": 1.5564475640797444,
"grad_norm": 0.002446634229272604,
"learning_rate": 3.498494095228151e-06,
"loss": 0.0,
"step": 975000
},
{
"epoch": 1.557245742317734,
"grad_norm": 0.00043144013034179807,
"learning_rate": 3.486425860612157e-06,
"loss": 0.0,
"step": 975500
},
{
"epoch": 1.5580439205557237,
"grad_norm": 0.00019767590856645256,
"learning_rate": 3.474375739273284e-06,
"loss": 0.0,
"step": 976000
},
{
"epoch": 1.5588420987937133,
"grad_norm": 0.00025959816412068903,
"learning_rate": 3.4623437501689182e-06,
"loss": 0.0,
"step": 976500
},
{
"epoch": 1.5596402770317028,
"grad_norm": 0.00023775137378834188,
"learning_rate": 3.4503299122279013e-06,
"loss": 0.0,
"step": 977000
},
{
"epoch": 1.5604384552696926,
"grad_norm": 0.0002516016538720578,
"learning_rate": 3.4383342443505385e-06,
"loss": 0.0,
"step": 977500
},
{
"epoch": 1.561236633507682,
"grad_norm": 0.0003499962331261486,
"learning_rate": 3.426356765408545e-06,
"loss": 0.0,
"step": 978000
},
{
"epoch": 1.5620348117456717,
"grad_norm": 0.00022499705664813519,
"learning_rate": 3.414397494245008e-06,
"loss": 0.0,
"step": 978500
},
{
"epoch": 1.5628329899836613,
"grad_norm": 0.0003553772403392941,
"learning_rate": 3.4024564496743843e-06,
"loss": 0.0,
"step": 979000
},
{
"epoch": 1.5636311682216508,
"grad_norm": 0.00025216786889359355,
"learning_rate": 3.3905336504824537e-06,
"loss": 0.0,
"step": 979500
},
{
"epoch": 1.5644293464596406,
"grad_norm": 1341.220458984375,
"learning_rate": 3.3786291154262935e-06,
"loss": 0.0,
"step": 980000
},
{
"epoch": 1.5652275246976302,
"grad_norm": 0.003939106594771147,
"learning_rate": 3.3667428632342373e-06,
"loss": 0.0,
"step": 980500
},
{
"epoch": 1.5660257029356197,
"grad_norm": 0.00019107607658952475,
"learning_rate": 3.354874912605866e-06,
"loss": 0.0,
"step": 981000
},
{
"epoch": 1.5668238811736095,
"grad_norm": 0.0002252194390166551,
"learning_rate": 3.343025282211972e-06,
"loss": 0.0,
"step": 981500
},
{
"epoch": 1.5676220594115988,
"grad_norm": 0.00031818528077565134,
"learning_rate": 3.3311939906945094e-06,
"loss": 0.0,
"step": 982000
},
{
"epoch": 1.5684202376495886,
"grad_norm": 0.0002334948512725532,
"learning_rate": 3.319381056666595e-06,
"loss": 0.0,
"step": 982500
},
{
"epoch": 1.5692184158875782,
"grad_norm": 0.003088391851633787,
"learning_rate": 3.307586498712468e-06,
"loss": 0.0,
"step": 983000
},
{
"epoch": 1.5700165941255677,
"grad_norm": 0.00014293832646217197,
"learning_rate": 3.2958103353874445e-06,
"loss": 0.0,
"step": 983500
},
{
"epoch": 1.5708147723635575,
"grad_norm": 0.0005570229259319603,
"learning_rate": 3.2840525852179165e-06,
"loss": 0.0,
"step": 984000
},
{
"epoch": 1.571612950601547,
"grad_norm": 0.00026708198129199445,
"learning_rate": 3.272313266701291e-06,
"loss": 0.0,
"step": 984500
},
{
"epoch": 1.5724111288395366,
"grad_norm": 0.0002196293353335932,
"learning_rate": 3.260592398306002e-06,
"loss": 0.0,
"step": 985000
},
{
"epoch": 1.5732093070775264,
"grad_norm": 0.04503238573670387,
"learning_rate": 3.2488899984714326e-06,
"loss": 0.0,
"step": 985500
},
{
"epoch": 1.5740074853155157,
"grad_norm": 0.00016815183334983885,
"learning_rate": 3.2372060856079287e-06,
"loss": 0.0,
"step": 986000
},
{
"epoch": 1.5748056635535055,
"grad_norm": 0.00018650360289029777,
"learning_rate": 3.2255406780967488e-06,
"loss": 0.0,
"step": 986500
},
{
"epoch": 1.575603841791495,
"grad_norm": 0.00018975707644131035,
"learning_rate": 3.213893794290029e-06,
"loss": 0.0,
"step": 987000
},
{
"epoch": 1.5764020200294846,
"grad_norm": 0.00021380094403866678,
"learning_rate": 3.2022654525107764e-06,
"loss": 0.0,
"step": 987500
},
{
"epoch": 1.5772001982674744,
"grad_norm": 0.0003533356648404151,
"learning_rate": 3.1906556710528117e-06,
"loss": 0.0,
"step": 988000
},
{
"epoch": 1.577998376505464,
"grad_norm": 0.0001912272855406627,
"learning_rate": 3.179064468180782e-06,
"loss": 0.0,
"step": 988500
},
{
"epoch": 1.5787965547434535,
"grad_norm": 0.00029489348526112735,
"learning_rate": 3.1674918621300764e-06,
"loss": 0.0,
"step": 989000
},
{
"epoch": 1.5795947329814433,
"grad_norm": 0.00020246152416802943,
"learning_rate": 3.1559378711068502e-06,
"loss": 0.0,
"step": 989500
},
{
"epoch": 1.5803929112194328,
"grad_norm": 0.0003821216232609004,
"learning_rate": 3.1444025132879654e-06,
"loss": 0.0,
"step": 990000
},
{
"epoch": 1.5811910894574224,
"grad_norm": 0.00018945671035908163,
"learning_rate": 3.132885806820962e-06,
"loss": 0.0,
"step": 990500
},
{
"epoch": 1.5819892676954121,
"grad_norm": 0.00016204801795538515,
"learning_rate": 3.1213877698240532e-06,
"loss": 0.0,
"step": 991000
},
{
"epoch": 1.5827874459334015,
"grad_norm": 0.000292215176159516,
"learning_rate": 3.1099084203860616e-06,
"loss": 0.0,
"step": 991500
},
{
"epoch": 1.5835856241713913,
"grad_norm": 0.0001693951344350353,
"learning_rate": 3.098447776566436e-06,
"loss": 0.0,
"step": 992000
},
{
"epoch": 1.5843838024093808,
"grad_norm": 0.00010817296424647793,
"learning_rate": 3.0870058563951768e-06,
"loss": 0.0,
"step": 992500
},
{
"epoch": 1.5851819806473704,
"grad_norm": 0.00021646858658641577,
"learning_rate": 3.0755826778728306e-06,
"loss": 0.0,
"step": 993000
},
{
"epoch": 1.5859801588853601,
"grad_norm": 0.0001887698017526418,
"learning_rate": 3.0641782589704655e-06,
"loss": 0.0,
"step": 993500
},
{
"epoch": 1.5867783371233497,
"grad_norm": 0.00021660560742020607,
"learning_rate": 3.052792617629634e-06,
"loss": 0.0,
"step": 994000
},
{
"epoch": 1.5875765153613393,
"grad_norm": 9.980924369301647e-05,
"learning_rate": 3.041425771762355e-06,
"loss": 0.0,
"step": 994500
},
{
"epoch": 1.588374693599329,
"grad_norm": 0.00025837685097940266,
"learning_rate": 3.0300777392510557e-06,
"loss": 0.0,
"step": 995000
},
{
"epoch": 1.5891728718373184,
"grad_norm": 0.00017442693933844566,
"learning_rate": 3.018748537948599e-06,
"loss": 0.0,
"step": 995500
},
{
"epoch": 1.5899710500753081,
"grad_norm": 0.00025473537971265614,
"learning_rate": 3.0074381856781974e-06,
"loss": 0.0,
"step": 996000
},
{
"epoch": 1.5907692283132977,
"grad_norm": 0.00018088742217514664,
"learning_rate": 2.9961467002334126e-06,
"loss": 0.0,
"step": 996500
},
{
"epoch": 1.5915674065512873,
"grad_norm": 0.00037461461033672094,
"learning_rate": 2.9848740993781313e-06,
"loss": 0.0,
"step": 997000
},
{
"epoch": 1.592365584789277,
"grad_norm": 0.002994926879182458,
"learning_rate": 2.9736204008465333e-06,
"loss": 0.0,
"step": 997500
},
{
"epoch": 1.5931637630272666,
"grad_norm": 0.00024818425299599767,
"learning_rate": 2.962385622343058e-06,
"loss": 0.0,
"step": 998000
},
{
"epoch": 1.5939619412652561,
"grad_norm": 0.0005606827326118946,
"learning_rate": 2.9511697815423698e-06,
"loss": 0.0,
"step": 998500
},
{
"epoch": 1.594760119503246,
"grad_norm": 0.0002748910628724843,
"learning_rate": 2.9399728960893537e-06,
"loss": 0.0,
"step": 999000
},
{
"epoch": 1.5955582977412353,
"grad_norm": 0.00022624792472925037,
"learning_rate": 2.928794983599071e-06,
"loss": 0.0,
"step": 999500
},
{
"epoch": 1.596356475979225,
"grad_norm": 0.0002598523278720677,
"learning_rate": 2.9176360616567267e-06,
"loss": 0.0,
"step": 1000000
},
{
"epoch": 1.5971546542172146,
"grad_norm": 0.0001525416737422347,
"learning_rate": 2.9064961478176584e-06,
"loss": 0.0,
"step": 1000500
},
{
"epoch": 1.5979528324552041,
"grad_norm": 0.00017040724924299866,
"learning_rate": 2.8953752596072976e-06,
"loss": 0.0,
"step": 1001000
},
{
"epoch": 1.598751010693194,
"grad_norm": 0.00020649759972002357,
"learning_rate": 2.884273414521146e-06,
"loss": 0.0,
"step": 1001500
},
{
"epoch": 1.5995491889311835,
"grad_norm": 0.000152139225974679,
"learning_rate": 2.8731906300247376e-06,
"loss": 0.0,
"step": 1002000
},
{
"epoch": 1.600347367169173,
"grad_norm": 0.00011175717372680083,
"learning_rate": 2.86212692355363e-06,
"loss": 0.0,
"step": 1002500
},
{
"epoch": 1.6011455454071628,
"grad_norm": 0.00022403241018764675,
"learning_rate": 2.851082312513368e-06,
"loss": 0.0,
"step": 1003000
},
{
"epoch": 1.6019437236451521,
"grad_norm": 0.0021336127538233995,
"learning_rate": 2.840056814279443e-06,
"loss": 0.0,
"step": 1003500
},
{
"epoch": 1.602741901883142,
"grad_norm": 0.00018766756693366915,
"learning_rate": 2.829050446197291e-06,
"loss": 0.0,
"step": 1004000
},
{
"epoch": 1.6035400801211317,
"grad_norm": 0.00015176778833847493,
"learning_rate": 2.818063225582246e-06,
"loss": 0.0,
"step": 1004500
},
{
"epoch": 1.604338258359121,
"grad_norm": 0.00027044734451919794,
"learning_rate": 2.8070951697195222e-06,
"loss": 0.0,
"step": 1005000
},
{
"epoch": 1.6051364365971108,
"grad_norm": 0.0003155279264319688,
"learning_rate": 2.7961462958641766e-06,
"loss": 0.0,
"step": 1005500
},
{
"epoch": 1.6059346148351004,
"grad_norm": 0.0001946605771081522,
"learning_rate": 2.785216621241098e-06,
"loss": 0.0,
"step": 1006000
},
{
"epoch": 1.60673279307309,
"grad_norm": 0.00022992221056483686,
"learning_rate": 2.774306163044969e-06,
"loss": 0.0,
"step": 1006500
},
{
"epoch": 1.6075309713110797,
"grad_norm": 0.000249813572736457,
"learning_rate": 2.7634149384402296e-06,
"loss": 0.0,
"step": 1007000
},
{
"epoch": 1.6083291495490692,
"grad_norm": 0.000225092371692881,
"learning_rate": 2.752542964561077e-06,
"loss": 0.0,
"step": 1007500
},
{
"epoch": 1.6091273277870588,
"grad_norm": 0.0002945462183561176,
"learning_rate": 2.7416902585114135e-06,
"loss": 0.0,
"step": 1008000
},
{
"epoch": 1.6099255060250486,
"grad_norm": 0.0001682073052506894,
"learning_rate": 2.7308568373648357e-06,
"loss": 0.0,
"step": 1008500
},
{
"epoch": 1.610723684263038,
"grad_norm": 0.22438447177410126,
"learning_rate": 2.7200427181645895e-06,
"loss": 0.0,
"step": 1009000
},
{
"epoch": 1.6115218625010277,
"grad_norm": 0.0002828448486980051,
"learning_rate": 2.7092479179235652e-06,
"loss": 0.0,
"step": 1009500
},
{
"epoch": 1.6123200407390172,
"grad_norm": 0.0001349742669845,
"learning_rate": 2.6984724536242637e-06,
"loss": 0.0,
"step": 1010000
},
{
"epoch": 1.6131182189770068,
"grad_norm": 0.0002548525226302445,
"learning_rate": 2.6877163422187483e-06,
"loss": 0.0,
"step": 1010500
},
{
"epoch": 1.6139163972149966,
"grad_norm": 0.00014282946358434856,
"learning_rate": 2.6769796006286544e-06,
"loss": 0.0,
"step": 1011000
},
{
"epoch": 1.6147145754529861,
"grad_norm": 0.00016806498751975596,
"learning_rate": 2.6662622457451408e-06,
"loss": 0.0,
"step": 1011500
},
{
"epoch": 1.6155127536909757,
"grad_norm": 0.00019143502868246287,
"learning_rate": 2.6555642944288565e-06,
"loss": 0.0,
"step": 1012000
},
{
"epoch": 1.6163109319289655,
"grad_norm": 0.00015487658674828708,
"learning_rate": 2.644885763509936e-06,
"loss": 0.0,
"step": 1012500
},
{
"epoch": 1.6171091101669548,
"grad_norm": 0.008766920305788517,
"learning_rate": 2.6342266697879573e-06,
"loss": 0.0,
"step": 1013000
},
{
"epoch": 1.6179072884049446,
"grad_norm": 0.00031943133217282593,
"learning_rate": 2.6235870300319237e-06,
"loss": 0.0,
"step": 1013500
},
{
"epoch": 1.6187054666429341,
"grad_norm": 0.00034817136474885046,
"learning_rate": 2.612966860980222e-06,
"loss": 0.0,
"step": 1014000
},
{
"epoch": 1.6195036448809237,
"grad_norm": 0.00019778979185502976,
"learning_rate": 2.6023661793406196e-06,
"loss": 0.0,
"step": 1014500
},
{
"epoch": 1.6203018231189135,
"grad_norm": 0.00024411575577687472,
"learning_rate": 2.5917850017902225e-06,
"loss": 0.0,
"step": 1015000
},
{
"epoch": 1.621100001356903,
"grad_norm": 0.00018817426462192088,
"learning_rate": 2.5812233449754465e-06,
"loss": 0.0,
"step": 1015500
},
{
"epoch": 1.6218981795948926,
"grad_norm": 0.00015945191262289882,
"learning_rate": 2.570681225512007e-06,
"loss": 0.0,
"step": 1016000
},
{
"epoch": 1.6226963578328824,
"grad_norm": 0.00012362716370262206,
"learning_rate": 2.5601586599848746e-06,
"loss": 0.0,
"step": 1016500
},
{
"epoch": 1.6234945360708717,
"grad_norm": 0.00016100883658509701,
"learning_rate": 2.5496556649482687e-06,
"loss": 0.0,
"step": 1017000
},
{
"epoch": 1.6242927143088615,
"grad_norm": 0.0001855713635450229,
"learning_rate": 2.539172256925602e-06,
"loss": 0.0,
"step": 1017500
},
{
"epoch": 1.625090892546851,
"grad_norm": 0.00019156381313223392,
"learning_rate": 2.52870845240949e-06,
"loss": 0.0,
"step": 1018000
},
{
"epoch": 1.6258890707848406,
"grad_norm": 0.00016442120249848813,
"learning_rate": 2.518264267861703e-06,
"loss": 0.0,
"step": 1018500
},
{
"epoch": 1.6266872490228304,
"grad_norm": 0.00023895353660918772,
"learning_rate": 2.507839719713134e-06,
"loss": 0.0,
"step": 1019000
},
{
"epoch": 1.62748542726082,
"grad_norm": 0.00017404610116500407,
"learning_rate": 2.497434824363805e-06,
"loss": 0.0,
"step": 1019500
},
{
"epoch": 1.6282836054988095,
"grad_norm": 0.0001925264805322513,
"learning_rate": 2.4870495981827933e-06,
"loss": 0.0,
"step": 1020000
},
{
"epoch": 1.6290817837367992,
"grad_norm": 0.00015991131658665836,
"learning_rate": 2.4766840575082617e-06,
"loss": 0.0,
"step": 1020500
},
{
"epoch": 1.6298799619747888,
"grad_norm": 0.00015943833568599075,
"learning_rate": 2.466338218647384e-06,
"loss": 0.0,
"step": 1021000
},
{
"epoch": 1.6306781402127783,
"grad_norm": 0.00015321993851102889,
"learning_rate": 2.4560120978763335e-06,
"loss": 0.0,
"step": 1021500
},
{
"epoch": 1.6314763184507681,
"grad_norm": 0.00015020738646853715,
"learning_rate": 2.4457057114402892e-06,
"loss": 0.0,
"step": 1022000
},
{
"epoch": 1.6322744966887575,
"grad_norm": 0.00021532770188059658,
"learning_rate": 2.435419075553358e-06,
"loss": 0.0,
"step": 1022500
},
{
"epoch": 1.6330726749267472,
"grad_norm": 0.00013064758968539536,
"learning_rate": 2.4251522063985893e-06,
"loss": 0.0,
"step": 1023000
},
{
"epoch": 1.6338708531647368,
"grad_norm": 0.00013219025277066976,
"learning_rate": 2.4149051201279213e-06,
"loss": 0.0,
"step": 1023500
},
{
"epoch": 1.6346690314027263,
"grad_norm": 0.00017550366465002298,
"learning_rate": 2.4046778328621945e-06,
"loss": 0.0,
"step": 1024000
},
{
"epoch": 1.6354672096407161,
"grad_norm": 0.00023099327518139035,
"learning_rate": 2.3944703606910757e-06,
"loss": 0.0,
"step": 1024500
},
{
"epoch": 1.6362653878787057,
"grad_norm": 0.00028397998539730906,
"learning_rate": 2.3842827196730633e-06,
"loss": 0.0,
"step": 1025000
},
{
"epoch": 1.6370635661166952,
"grad_norm": 0.00014657012070529163,
"learning_rate": 2.3741149258354766e-06,
"loss": 0.0,
"step": 1025500
},
{
"epoch": 1.637861744354685,
"grad_norm": 0.00023401924408972263,
"learning_rate": 2.363966995174387e-06,
"loss": 0.0,
"step": 1026000
},
{
"epoch": 1.6386599225926743,
"grad_norm": 0.00034546665847301483,
"learning_rate": 2.353838943654632e-06,
"loss": 0.0,
"step": 1026500
},
{
"epoch": 1.6394581008306641,
"grad_norm": 0.00020099164976272732,
"learning_rate": 2.3437307872097597e-06,
"loss": 0.0,
"step": 1027000
},
{
"epoch": 1.6402562790686537,
"grad_norm": 0.0002026653237408027,
"learning_rate": 2.333642541742044e-06,
"loss": 0.0,
"step": 1027500
},
{
"epoch": 1.6410544573066432,
"grad_norm": 0.00018300658848602325,
"learning_rate": 2.323574223122414e-06,
"loss": 0.0,
"step": 1028000
},
{
"epoch": 1.641852635544633,
"grad_norm": 0.00023787171812728047,
"learning_rate": 2.313525847190448e-06,
"loss": 0.0,
"step": 1028500
},
{
"epoch": 1.6426508137826226,
"grad_norm": 0.00020482360559981316,
"learning_rate": 2.303497429754365e-06,
"loss": 0.0,
"step": 1029000
},
{
"epoch": 1.6434489920206121,
"grad_norm": 0.0002030259493039921,
"learning_rate": 2.293488986590976e-06,
"loss": 0.0,
"step": 1029500
},
{
"epoch": 1.644247170258602,
"grad_norm": 0.000236693857004866,
"learning_rate": 2.2835005334456744e-06,
"loss": 0.0,
"step": 1030000
},
{
"epoch": 1.6450453484965912,
"grad_norm": 0.00017397591727785766,
"learning_rate": 2.273532086032394e-06,
"loss": 0.0,
"step": 1030500
},
{
"epoch": 1.645843526734581,
"grad_norm": 0.00017545593436807394,
"learning_rate": 2.2635836600336046e-06,
"loss": 0.0,
"step": 1031000
},
{
"epoch": 1.6466417049725706,
"grad_norm": 0.000387836538720876,
"learning_rate": 2.2536552711002804e-06,
"loss": 0.0,
"step": 1031500
},
{
"epoch": 1.6474398832105601,
"grad_norm": 3.9118099212646484,
"learning_rate": 2.243746934851859e-06,
"loss": 0.0,
"step": 1032000
},
{
"epoch": 1.64823806144855,
"grad_norm": 1.9383196830749512,
"learning_rate": 2.2338586668762464e-06,
"loss": 0.0,
"step": 1032500
},
{
"epoch": 1.6490362396865395,
"grad_norm": 0.000305307621601969,
"learning_rate": 2.2239904827297695e-06,
"loss": 0.0,
"step": 1033000
},
{
"epoch": 1.649834417924529,
"grad_norm": 0.00021406357700470835,
"learning_rate": 2.2141423979371645e-06,
"loss": 0.0,
"step": 1033500
},
{
"epoch": 1.6506325961625188,
"grad_norm": 0.0006000241846777499,
"learning_rate": 2.2043144279915356e-06,
"loss": 0.0,
"step": 1034000
},
{
"epoch": 1.6514307744005081,
"grad_norm": 0.00020043583936057985,
"learning_rate": 2.194506588354352e-06,
"loss": 0.0,
"step": 1034500
},
{
"epoch": 1.652228952638498,
"grad_norm": 0.01158731710165739,
"learning_rate": 2.1847188944554176e-06,
"loss": 0.0,
"step": 1035000
},
{
"epoch": 1.6530271308764877,
"grad_norm": 0.00017917039804160595,
"learning_rate": 2.174951361692825e-06,
"loss": 0.0,
"step": 1035500
},
{
"epoch": 1.653825309114477,
"grad_norm": 0.00020505704742390662,
"learning_rate": 2.165204005432968e-06,
"loss": 0.0,
"step": 1036000
},
{
"epoch": 1.6546234873524668,
"grad_norm": 0.00024057974223978817,
"learning_rate": 2.1554768410104898e-06,
"loss": 0.0,
"step": 1036500
},
{
"epoch": 1.6554216655904563,
"grad_norm": 0.0001608024467714131,
"learning_rate": 2.1457698837282726e-06,
"loss": 0.0,
"step": 1037000
},
{
"epoch": 1.656219843828446,
"grad_norm": 0.00022758333943784237,
"learning_rate": 2.1360831488573956e-06,
"loss": 0.0,
"step": 1037500
},
{
"epoch": 1.6570180220664357,
"grad_norm": 0.002283047651872039,
"learning_rate": 2.1264166516371374e-06,
"loss": 0.0,
"step": 1038000
},
{
"epoch": 1.6578162003044252,
"grad_norm": 0.00017407875566277653,
"learning_rate": 2.11677040727494e-06,
"loss": 0.0,
"step": 1038500
},
{
"epoch": 1.6586143785424148,
"grad_norm": 0.00016274578229058534,
"learning_rate": 2.107144430946367e-06,
"loss": 0.0,
"step": 1039000
},
{
"epoch": 1.6594125567804046,
"grad_norm": 0.00018639072368387133,
"learning_rate": 2.097538737795112e-06,
"loss": 0.0,
"step": 1039500
},
{
"epoch": 1.660210735018394,
"grad_norm": 0.0005455246428027749,
"learning_rate": 2.087953342932958e-06,
"loss": 0.0,
"step": 1040000
},
{
"epoch": 1.6610089132563837,
"grad_norm": 0.00024452278739772737,
"learning_rate": 2.0783882614397413e-06,
"loss": 0.0,
"step": 1040500
},
{
"epoch": 1.6618070914943732,
"grad_norm": 0.0030349683947861195,
"learning_rate": 2.068843508363353e-06,
"loss": 0.0,
"step": 1041000
},
{
"epoch": 1.6626052697323628,
"grad_norm": 0.00021702187950722873,
"learning_rate": 2.059319098719701e-06,
"loss": 0.0,
"step": 1041500
},
{
"epoch": 1.6634034479703526,
"grad_norm": 0.00023376916942652315,
"learning_rate": 2.0498150474926897e-06,
"loss": 0.0,
"step": 1042000
},
{
"epoch": 1.664201626208342,
"grad_norm": 0.00025968361296691,
"learning_rate": 2.040331369634189e-06,
"loss": 0.0,
"step": 1042500
},
{
"epoch": 1.6649998044463317,
"grad_norm": 0.00019714338122867048,
"learning_rate": 2.0308680800640227e-06,
"loss": 0.0,
"step": 1043000
},
{
"epoch": 1.6657979826843214,
"grad_norm": 0.00014786337851546705,
"learning_rate": 2.021425193669945e-06,
"loss": 0.0,
"step": 1043500
},
{
"epoch": 1.6665961609223108,
"grad_norm": 0.00020566130115184933,
"learning_rate": 2.0120027253075945e-06,
"loss": 0.0,
"step": 1044000
},
{
"epoch": 1.6673943391603006,
"grad_norm": 0.00031280418625101447,
"learning_rate": 2.0026006898005033e-06,
"loss": 0.0,
"step": 1044500
},
{
"epoch": 1.66819251739829,
"grad_norm": 0.0002165736659662798,
"learning_rate": 1.993219101940055e-06,
"loss": 0.0,
"step": 1045000
},
{
"epoch": 1.6689906956362797,
"grad_norm": 0.00018687658302951604,
"learning_rate": 1.983857976485464e-06,
"loss": 0.0,
"step": 1045500
},
{
"epoch": 1.6697888738742694,
"grad_norm": 0.0002107059262925759,
"learning_rate": 1.974517328163748e-06,
"loss": 0.0,
"step": 1046000
},
{
"epoch": 1.670587052112259,
"grad_norm": 0.00026908345171250403,
"learning_rate": 1.965197171669715e-06,
"loss": 0.0,
"step": 1046500
},
{
"epoch": 1.6713852303502486,
"grad_norm": 0.00020733063865918666,
"learning_rate": 1.9558975216659407e-06,
"loss": 0.0,
"step": 1047000
},
{
"epoch": 1.6721834085882383,
"grad_norm": 0.0003536621225066483,
"learning_rate": 1.946618392782725e-06,
"loss": 0.0,
"step": 1047500
},
{
"epoch": 1.6729815868262277,
"grad_norm": 0.0002418523363303393,
"learning_rate": 1.937359799618094e-06,
"loss": 0.0,
"step": 1048000
},
{
"epoch": 1.6737797650642174,
"grad_norm": 0.0007535194745287299,
"learning_rate": 1.928121756737766e-06,
"loss": 0.0,
"step": 1048500
},
{
"epoch": 1.674577943302207,
"grad_norm": 0.00025971242575906217,
"learning_rate": 1.918904278675132e-06,
"loss": 0.0,
"step": 1049000
},
{
"epoch": 1.6753761215401966,
"grad_norm": 0.0002389907167525962,
"learning_rate": 1.9097073799312237e-06,
"loss": 0.0,
"step": 1049500
},
{
"epoch": 1.6761742997781863,
"grad_norm": 0.00017279484018217772,
"learning_rate": 1.9005310749746907e-06,
"loss": 0.0,
"step": 1050000
},
{
"epoch": 1.6761742997781863,
"eval_loss": 1.5775514839333482e-05,
"eval_runtime": 22138.2637,
"eval_samples_per_second": 100.608,
"eval_steps_per_second": 3.144,
"step": 1050000
},
{
"epoch": 1.6769724780161759,
"grad_norm": 0.0001552966859890148,
"learning_rate": 1.8913753782418087e-06,
"loss": 0.0,
"step": 1050500
},
{
"epoch": 1.6777706562541654,
"grad_norm": 0.00013622870028484613,
"learning_rate": 1.8822403041364056e-06,
"loss": 0.0,
"step": 1051000
},
{
"epoch": 1.6785688344921552,
"grad_norm": 0.002185018267482519,
"learning_rate": 1.8731258670298823e-06,
"loss": 0.0,
"step": 1051500
},
{
"epoch": 1.6793670127301448,
"grad_norm": 0.0002003060217248276,
"learning_rate": 1.8640320812611672e-06,
"loss": 0.0,
"step": 1052000
},
{
"epoch": 1.6801651909681343,
"grad_norm": 0.00023303533089347184,
"learning_rate": 1.854958961136703e-06,
"loss": 0.0,
"step": 1052500
},
{
"epoch": 1.680963369206124,
"grad_norm": 0.00019897008314728737,
"learning_rate": 1.8459065209304165e-06,
"loss": 0.0,
"step": 1053000
},
{
"epoch": 1.6817615474441134,
"grad_norm": 0.0003193389857187867,
"learning_rate": 1.8368747748836963e-06,
"loss": 0.0,
"step": 1053500
},
{
"epoch": 1.6825597256821032,
"grad_norm": 0.00824870727956295,
"learning_rate": 1.8278637372053925e-06,
"loss": 0.0,
"step": 1054000
},
{
"epoch": 1.6833579039200928,
"grad_norm": 0.0001839359028963372,
"learning_rate": 1.818873422071759e-06,
"loss": 0.0,
"step": 1054500
},
{
"epoch": 1.6841560821580823,
"grad_norm": 0.0003090524405706674,
"learning_rate": 1.809903843626457e-06,
"loss": 0.0,
"step": 1055000
},
{
"epoch": 1.684954260396072,
"grad_norm": 0.0002031936019193381,
"learning_rate": 1.800955015980517e-06,
"loss": 0.0,
"step": 1055500
},
{
"epoch": 1.6857524386340617,
"grad_norm": 0.00020773948926944286,
"learning_rate": 1.7920269532123395e-06,
"loss": 0.0,
"step": 1056000
},
{
"epoch": 1.6865506168720512,
"grad_norm": 0.0002412260655546561,
"learning_rate": 1.7831196693676439e-06,
"loss": 0.0,
"step": 1056500
},
{
"epoch": 1.687348795110041,
"grad_norm": 0.00022422504844143987,
"learning_rate": 1.7742331784594556e-06,
"loss": 0.0,
"step": 1057000
},
{
"epoch": 1.6881469733480303,
"grad_norm": 0.00018461488070897758,
"learning_rate": 1.7653674944681103e-06,
"loss": 0.0,
"step": 1057500
},
{
"epoch": 1.68894515158602,
"grad_norm": 0.0002039273822447285,
"learning_rate": 1.756522631341184e-06,
"loss": 0.0,
"step": 1058000
},
{
"epoch": 1.6897433298240097,
"grad_norm": 0.0002371125592617318,
"learning_rate": 1.74769860299352e-06,
"loss": 0.0,
"step": 1058500
},
{
"epoch": 1.6905415080619992,
"grad_norm": 0.00026676972629502416,
"learning_rate": 1.7388954233071646e-06,
"loss": 0.0,
"step": 1059000
},
{
"epoch": 1.691339686299989,
"grad_norm": 0.00016791919188108295,
"learning_rate": 1.730113106131375e-06,
"loss": 0.0,
"step": 1059500
},
{
"epoch": 1.6921378645379785,
"grad_norm": 0.00033503255690447986,
"learning_rate": 1.721351665282593e-06,
"loss": 0.0,
"step": 1060000
},
{
"epoch": 1.692936042775968,
"grad_norm": 0.00023998318647500128,
"learning_rate": 1.7126111145444018e-06,
"loss": 0.0,
"step": 1060500
},
{
"epoch": 1.6937342210139579,
"grad_norm": 0.00032387388637289405,
"learning_rate": 1.703891467667531e-06,
"loss": 0.0,
"step": 1061000
},
{
"epoch": 1.6945323992519472,
"grad_norm": 0.00016454454453196377,
"learning_rate": 1.6951927383698241e-06,
"loss": 0.0,
"step": 1061500
},
{
"epoch": 1.695330577489937,
"grad_norm": 0.00031503697391599417,
"learning_rate": 1.6865149403362156e-06,
"loss": 0.0,
"step": 1062000
},
{
"epoch": 1.6961287557279265,
"grad_norm": 0.003650024998933077,
"learning_rate": 1.6778580872187039e-06,
"loss": 0.0,
"step": 1062500
},
{
"epoch": 1.696926933965916,
"grad_norm": 0.00024623217177577317,
"learning_rate": 1.6692221926363444e-06,
"loss": 0.0,
"step": 1063000
},
{
"epoch": 1.6977251122039059,
"grad_norm": 0.0002650288224685937,
"learning_rate": 1.6606072701752229e-06,
"loss": 0.0,
"step": 1063500
},
{
"epoch": 1.6985232904418954,
"grad_norm": 0.0002041991101577878,
"learning_rate": 1.6520133333884214e-06,
"loss": 0.0,
"step": 1064000
},
{
"epoch": 1.699321468679885,
"grad_norm": 0.000113544927444309,
"learning_rate": 1.643440395796013e-06,
"loss": 0.0,
"step": 1064500
},
{
"epoch": 1.7001196469178748,
"grad_norm": 0.0001603996497578919,
"learning_rate": 1.6348884708850348e-06,
"loss": 0.0,
"step": 1065000
},
{
"epoch": 1.700917825155864,
"grad_norm": 0.00020040127856191248,
"learning_rate": 1.6263575721094708e-06,
"loss": 0.0,
"step": 1065500
},
{
"epoch": 1.7017160033938539,
"grad_norm": 0.00025933951837942004,
"learning_rate": 1.6178477128902141e-06,
"loss": 0.0,
"step": 1066000
},
{
"epoch": 1.7025141816318434,
"grad_norm": 0.00019231809710618109,
"learning_rate": 1.6093589066150687e-06,
"loss": 0.0,
"step": 1066500
},
{
"epoch": 1.703312359869833,
"grad_norm": 0.00026155367959290743,
"learning_rate": 1.6008911666387189e-06,
"loss": 0.0,
"step": 1067000
},
{
"epoch": 1.7041105381078228,
"grad_norm": 0.00014199796714819968,
"learning_rate": 1.5924445062826948e-06,
"loss": 0.0,
"step": 1067500
},
{
"epoch": 1.7049087163458123,
"grad_norm": 285.30670166015625,
"learning_rate": 1.584018938835377e-06,
"loss": 0.0,
"step": 1068000
},
{
"epoch": 1.7057068945838019,
"grad_norm": 0.00035446975380182266,
"learning_rate": 1.575614477551961e-06,
"loss": 0.0,
"step": 1068500
},
{
"epoch": 1.7065050728217916,
"grad_norm": 0.00017485932039562613,
"learning_rate": 1.5672311356544284e-06,
"loss": 0.0,
"step": 1069000
},
{
"epoch": 1.7073032510597812,
"grad_norm": 0.00022567392443306744,
"learning_rate": 1.5588689263315426e-06,
"loss": 0.0,
"step": 1069500
},
{
"epoch": 1.7081014292977708,
"grad_norm": 0.00024282569938804954,
"learning_rate": 1.550527862738822e-06,
"loss": 0.0,
"step": 1070000
},
{
"epoch": 1.7088996075357605,
"grad_norm": 0.00038521605893038213,
"learning_rate": 1.54220795799852e-06,
"loss": 0.0,
"step": 1070500
},
{
"epoch": 1.7096977857737499,
"grad_norm": 0.0002367474662605673,
"learning_rate": 1.5339092251995912e-06,
"loss": 0.0,
"step": 1071000
},
{
"epoch": 1.7104959640117396,
"grad_norm": 0.0003218221536371857,
"learning_rate": 1.5256316773976941e-06,
"loss": 0.0,
"step": 1071500
},
{
"epoch": 1.7112941422497292,
"grad_norm": 0.0006056890706531703,
"learning_rate": 1.5173753276151586e-06,
"loss": 0.0,
"step": 1072000
},
{
"epoch": 1.7120923204877188,
"grad_norm": 0.00013564640539698303,
"learning_rate": 1.5091401888409546e-06,
"loss": 0.0,
"step": 1072500
},
{
"epoch": 1.7128904987257085,
"grad_norm": 0.00019917692407034338,
"learning_rate": 1.5009262740306951e-06,
"loss": 0.0,
"step": 1073000
},
{
"epoch": 1.713688676963698,
"grad_norm": 0.00017702036711852998,
"learning_rate": 1.4927335961065953e-06,
"loss": 0.0,
"step": 1073500
},
{
"epoch": 1.7144868552016876,
"grad_norm": 0.0002473437343724072,
"learning_rate": 1.4845621679574666e-06,
"loss": 0.0,
"step": 1074000
},
{
"epoch": 1.7152850334396774,
"grad_norm": 0.00022888657986186445,
"learning_rate": 1.4764120024386812e-06,
"loss": 0.0,
"step": 1074500
},
{
"epoch": 1.7160832116776668,
"grad_norm": 0.00025228134472854435,
"learning_rate": 1.4682831123721707e-06,
"loss": 0.0,
"step": 1075000
},
{
"epoch": 1.7168813899156565,
"grad_norm": 0.0002196329296566546,
"learning_rate": 1.460175510546392e-06,
"loss": 0.0,
"step": 1075500
},
{
"epoch": 1.717679568153646,
"grad_norm": 0.00019092884031124413,
"learning_rate": 1.4520892097163059e-06,
"loss": 0.0,
"step": 1076000
},
{
"epoch": 1.7184777463916356,
"grad_norm": 1346.5693359375,
"learning_rate": 1.4440242226033672e-06,
"loss": 0.0,
"step": 1076500
},
{
"epoch": 1.7192759246296254,
"grad_norm": 0.00036944085150025785,
"learning_rate": 1.4359805618955025e-06,
"loss": 0.0,
"step": 1077000
},
{
"epoch": 1.720074102867615,
"grad_norm": 0.0002491538762114942,
"learning_rate": 1.4279582402470853e-06,
"loss": 0.0,
"step": 1077500
},
{
"epoch": 1.7208722811056045,
"grad_norm": 0.00014698713493999094,
"learning_rate": 1.419957270278912e-06,
"loss": 0.0,
"step": 1078000
},
{
"epoch": 1.7216704593435943,
"grad_norm": 0.0001589061866980046,
"learning_rate": 1.4119776645781956e-06,
"loss": 0.0,
"step": 1078500
},
{
"epoch": 1.7224686375815836,
"grad_norm": 0.00016320293070748448,
"learning_rate": 1.4040194356985408e-06,
"loss": 0.0,
"step": 1079000
},
{
"epoch": 1.7232668158195734,
"grad_norm": 0.00017447932623326778,
"learning_rate": 1.3960825961599112e-06,
"loss": 0.0,
"step": 1079500
},
{
"epoch": 1.724064994057563,
"grad_norm": 0.00021619001927319914,
"learning_rate": 1.38816715844863e-06,
"loss": 0.0,
"step": 1080000
},
{
"epoch": 1.7248631722955525,
"grad_norm": 0.0002612106909509748,
"learning_rate": 1.380273135017348e-06,
"loss": 0.0,
"step": 1080500
},
{
"epoch": 1.7256613505335423,
"grad_norm": 0.0005082807037979364,
"learning_rate": 1.3724005382850296e-06,
"loss": 0.0,
"step": 1081000
},
{
"epoch": 1.7264595287715319,
"grad_norm": 0.00015604299551341683,
"learning_rate": 1.3645493806369258e-06,
"loss": 0.0,
"step": 1081500
},
{
"epoch": 1.7272577070095214,
"grad_norm": 0.0005650034872815013,
"learning_rate": 1.3567196744245531e-06,
"loss": 0.0,
"step": 1082000
},
{
"epoch": 1.7280558852475112,
"grad_norm": 0.00015410668856929988,
"learning_rate": 1.3489114319657014e-06,
"loss": 0.0,
"step": 1082500
},
{
"epoch": 1.7288540634855007,
"grad_norm": 0.00018081202870234847,
"learning_rate": 1.3411246655443715e-06,
"loss": 0.0,
"step": 1083000
},
{
"epoch": 1.7296522417234903,
"grad_norm": 0.0002297761384397745,
"learning_rate": 1.3333593874107908e-06,
"loss": 0.0,
"step": 1083500
},
{
"epoch": 1.73045041996148,
"grad_norm": 0.00018270351574756205,
"learning_rate": 1.3256156097813754e-06,
"loss": 0.0,
"step": 1084000
},
{
"epoch": 1.7312485981994694,
"grad_norm": 0.001780197722837329,
"learning_rate": 1.3178933448387237e-06,
"loss": 0.0,
"step": 1084500
},
{
"epoch": 1.7320467764374592,
"grad_norm": 0.00021518795983865857,
"learning_rate": 1.3101926047315826e-06,
"loss": 0.0,
"step": 1085000
},
{
"epoch": 1.7328449546754487,
"grad_norm": 0.00019899863400496542,
"learning_rate": 1.3025134015748296e-06,
"loss": 0.0,
"step": 1085500
},
{
"epoch": 1.7336431329134383,
"grad_norm": 0.00016011096886359155,
"learning_rate": 1.294855747449481e-06,
"loss": 0.0,
"step": 1086000
},
{
"epoch": 1.734441311151428,
"grad_norm": 0.00023628213966730982,
"learning_rate": 1.2872196544026332e-06,
"loss": 0.0,
"step": 1086500
},
{
"epoch": 1.7352394893894176,
"grad_norm": 0.0001332947431365028,
"learning_rate": 1.2796051344474718e-06,
"loss": 0.0,
"step": 1087000
},
{
"epoch": 1.7360376676274072,
"grad_norm": 0.0002323123480891809,
"learning_rate": 1.2720121995632362e-06,
"loss": 0.0,
"step": 1087500
},
{
"epoch": 1.736835845865397,
"grad_norm": 0.00017582009604666382,
"learning_rate": 1.2644408616952142e-06,
"loss": 0.0,
"step": 1088000
},
{
"epoch": 1.7376340241033863,
"grad_norm": 0.00015031747170723975,
"learning_rate": 1.2568911327547178e-06,
"loss": 0.0,
"step": 1088500
},
{
"epoch": 1.738432202341376,
"grad_norm": 0.00016457086894661188,
"learning_rate": 1.2493630246190546e-06,
"loss": 0.0,
"step": 1089000
},
{
"epoch": 1.7392303805793656,
"grad_norm": 0.0003954498388338834,
"learning_rate": 1.2418565491315325e-06,
"loss": 0.0,
"step": 1089500
},
{
"epoch": 1.7400285588173552,
"grad_norm": 0.0008840158116072416,
"learning_rate": 1.234371718101412e-06,
"loss": 0.0,
"step": 1090000
},
{
"epoch": 1.740826737055345,
"grad_norm": 0.00028518703766167164,
"learning_rate": 1.2269085433039135e-06,
"loss": 0.0,
"step": 1090500
},
{
"epoch": 1.7416249152933345,
"grad_norm": 0.0001815830619307235,
"learning_rate": 1.2194670364801785e-06,
"loss": 0.0,
"step": 1091000
},
{
"epoch": 1.742423093531324,
"grad_norm": 0.00017911636678036302,
"learning_rate": 1.2120472093372642e-06,
"loss": 0.0,
"step": 1091500
},
{
"epoch": 1.7432212717693139,
"grad_norm": 0.0002147218183381483,
"learning_rate": 1.204649073548128e-06,
"loss": 0.0,
"step": 1092000
},
{
"epoch": 1.7440194500073032,
"grad_norm": 0.00016766555199865252,
"learning_rate": 1.1972726407515848e-06,
"loss": 0.0,
"step": 1092500
},
{
"epoch": 1.744817628245293,
"grad_norm": 0.00023504404816776514,
"learning_rate": 1.1899179225523305e-06,
"loss": 0.0,
"step": 1093000
},
{
"epoch": 1.7456158064832825,
"grad_norm": 0.0001661064598010853,
"learning_rate": 1.182584930520874e-06,
"loss": 0.0,
"step": 1093500
},
{
"epoch": 1.746413984721272,
"grad_norm": 0.00013320970174390823,
"learning_rate": 1.175273676193566e-06,
"loss": 0.0,
"step": 1094000
},
{
"epoch": 1.7472121629592618,
"grad_norm": 0.00019263781723566353,
"learning_rate": 1.167984171072541e-06,
"loss": 0.0,
"step": 1094500
},
{
"epoch": 1.7480103411972514,
"grad_norm": 0.00018430198542773724,
"learning_rate": 1.1607164266257297e-06,
"loss": 0.0,
"step": 1095000
},
{
"epoch": 1.748808519435241,
"grad_norm": 0.000189875194337219,
"learning_rate": 1.1534704542868268e-06,
"loss": 0.0,
"step": 1095500
},
{
"epoch": 1.7496066976732307,
"grad_norm": 0.00014763257058802992,
"learning_rate": 1.1462462654552685e-06,
"loss": 0.0,
"step": 1096000
},
{
"epoch": 1.75040487591122,
"grad_norm": 0.0001301927404711023,
"learning_rate": 1.139043871496227e-06,
"loss": 0.0,
"step": 1096500
},
{
"epoch": 1.7512030541492098,
"grad_norm": 0.000194509033462964,
"learning_rate": 1.1318632837405885e-06,
"loss": 0.0,
"step": 1097000
},
{
"epoch": 1.7520012323871994,
"grad_norm": 0.00022325903410091996,
"learning_rate": 1.1247045134849248e-06,
"loss": 0.0,
"step": 1097500
},
{
"epoch": 1.752799410625189,
"grad_norm": 0.00014271271356847137,
"learning_rate": 1.1175675719914924e-06,
"loss": 0.0,
"step": 1098000
},
{
"epoch": 1.7535975888631787,
"grad_norm": 0.00030372265609912574,
"learning_rate": 1.1104524704882014e-06,
"loss": 0.0,
"step": 1098500
},
{
"epoch": 1.7543957671011683,
"grad_norm": 0.00020143936853855848,
"learning_rate": 1.1033592201686093e-06,
"loss": 0.0,
"step": 1099000
},
{
"epoch": 1.7551939453391578,
"grad_norm": 0.00019451904518064111,
"learning_rate": 1.0962878321918884e-06,
"loss": 0.0,
"step": 1099500
},
{
"epoch": 1.7559921235771476,
"grad_norm": 0.0001757531426846981,
"learning_rate": 1.0892383176828213e-06,
"loss": 0.0,
"step": 1100000
},
{
"epoch": 1.7567903018151372,
"grad_norm": 0.0002402666286798194,
"learning_rate": 1.0822106877317834e-06,
"loss": 0.0,
"step": 1100500
},
{
"epoch": 1.7575884800531267,
"grad_norm": 0.0002771165454760194,
"learning_rate": 1.0752049533947122e-06,
"loss": 0.0,
"step": 1101000
},
{
"epoch": 1.7583866582911165,
"grad_norm": 0.00023865242837928236,
"learning_rate": 1.0682211256931051e-06,
"loss": 0.0,
"step": 1101500
},
{
"epoch": 1.7591848365291058,
"grad_norm": 0.000238187174545601,
"learning_rate": 1.0612592156139933e-06,
"loss": 0.0,
"step": 1102000
},
{
"epoch": 1.7599830147670956,
"grad_norm": 0.00048370350850746036,
"learning_rate": 1.0543192341099306e-06,
"loss": 0.0,
"step": 1102500
},
{
"epoch": 1.7607811930050852,
"grad_norm": 0.00022986202384345233,
"learning_rate": 1.0474011920989667e-06,
"loss": 0.0,
"step": 1103000
},
{
"epoch": 1.7615793712430747,
"grad_norm": 0.0001623473799554631,
"learning_rate": 1.0405051004646377e-06,
"loss": 0.0,
"step": 1103500
},
{
"epoch": 1.7623775494810645,
"grad_norm": 0.0001603475830052048,
"learning_rate": 1.0336309700559531e-06,
"loss": 0.0,
"step": 1104000
},
{
"epoch": 1.763175727719054,
"grad_norm": 0.00022371606610249728,
"learning_rate": 1.0267788116873628e-06,
"loss": 0.0,
"step": 1104500
},
{
"epoch": 1.7639739059570436,
"grad_norm": 0.00020552480418700725,
"learning_rate": 1.0199486361387567e-06,
"loss": 0.0,
"step": 1105000
},
{
"epoch": 1.7647720841950334,
"grad_norm": 0.0001210166301461868,
"learning_rate": 1.0131404541554412e-06,
"loss": 0.0,
"step": 1105500
},
{
"epoch": 1.7655702624330227,
"grad_norm": 0.00022290610650088638,
"learning_rate": 1.0063542764481204e-06,
"loss": 0.0,
"step": 1106000
},
{
"epoch": 1.7663684406710125,
"grad_norm": 0.0002481382107362151,
"learning_rate": 9.995901136928776e-07,
"loss": 0.0,
"step": 1106500
},
{
"epoch": 1.767166618909002,
"grad_norm": 0.0002452095504850149,
"learning_rate": 9.928479765311689e-07,
"loss": 0.0,
"step": 1107000
},
{
"epoch": 1.7679647971469916,
"grad_norm": 0.00017975401715375483,
"learning_rate": 9.86127875569796e-07,
"loss": 0.0,
"step": 1107500
},
{
"epoch": 1.7687629753849814,
"grad_norm": 0.0002084321022266522,
"learning_rate": 9.794298213808912e-07,
"loss": 0.0,
"step": 1108000
},
{
"epoch": 1.769561153622971,
"grad_norm": 0.000212875209399499,
"learning_rate": 9.727538245019047e-07,
"loss": 0.0,
"step": 1108500
},
{
"epoch": 1.7703593318609605,
"grad_norm": 0.00023228510690387338,
"learning_rate": 9.66099895435587e-07,
"loss": 0.0,
"step": 1109000
},
{
"epoch": 1.7711575100989503,
"grad_norm": 0.0001471398863941431,
"learning_rate": 9.594680446499716e-07,
"loss": 0.0,
"step": 1109500
},
{
"epoch": 1.7719556883369396,
"grad_norm": 0.00029612609068863094,
"learning_rate": 9.528582825783505e-07,
"loss": 0.0,
"step": 1110000
},
{
"epoch": 1.7727538665749294,
"grad_norm": 0.00016415739082731307,
"learning_rate": 9.462706196192777e-07,
"loss": 0.0,
"step": 1110500
},
{
"epoch": 1.773552044812919,
"grad_norm": 0.00027014673105441034,
"learning_rate": 9.397050661365348e-07,
"loss": 0.0,
"step": 1111000
},
{
"epoch": 1.7743502230509085,
"grad_norm": 0.0001431436976417899,
"learning_rate": 9.331616324591142e-07,
"loss": 0.0,
"step": 1111500
},
{
"epoch": 1.7751484012888983,
"grad_norm": 0.000254760350799188,
"learning_rate": 9.266403288812197e-07,
"loss": 0.0,
"step": 1112000
},
{
"epoch": 1.7759465795268878,
"grad_norm": 0.0002271405392093584,
"learning_rate": 9.201411656622333e-07,
"loss": 0.0,
"step": 1112500
},
{
"epoch": 1.7767447577648774,
"grad_norm": 0.0002613988472148776,
"learning_rate": 9.136641530267126e-07,
"loss": 0.0,
"step": 1113000
},
{
"epoch": 1.7775429360028672,
"grad_norm": 0.000750661245547235,
"learning_rate": 9.072093011643567e-07,
"loss": 0.0,
"step": 1113500
},
{
"epoch": 1.7783411142408567,
"grad_norm": 0.00017439897055737674,
"learning_rate": 9.007766202300094e-07,
"loss": 0.0,
"step": 1114000
},
{
"epoch": 1.7791392924788463,
"grad_norm": 0.0002634642878547311,
"learning_rate": 8.943661203436337e-07,
"loss": 0.0,
"step": 1114500
},
{
"epoch": 1.779937470716836,
"grad_norm": 0.00016087290714494884,
"learning_rate": 8.879778115902942e-07,
"loss": 0.0,
"step": 1115000
},
{
"epoch": 1.7807356489548254,
"grad_norm": 0.00019531312864273787,
"learning_rate": 8.816117040201449e-07,
"loss": 0.0,
"step": 1115500
},
{
"epoch": 1.7815338271928152,
"grad_norm": 0.00020467853755690157,
"learning_rate": 8.752678076484194e-07,
"loss": 0.0,
"step": 1116000
},
{
"epoch": 1.7823320054308047,
"grad_norm": 0.00017259104060940444,
"learning_rate": 8.689461324553976e-07,
"loss": 0.0,
"step": 1116500
},
{
"epoch": 1.7831301836687943,
"grad_norm": 0.0002617633144836873,
"learning_rate": 8.626466883864093e-07,
"loss": 0.0,
"step": 1117000
},
{
"epoch": 1.783928361906784,
"grad_norm": 0.00016495882300660014,
"learning_rate": 8.563694853518017e-07,
"loss": 0.0,
"step": 1117500
},
{
"epoch": 1.7847265401447736,
"grad_norm": 0.00015124342462513596,
"learning_rate": 8.501145332269439e-07,
"loss": 0.0,
"step": 1118000
},
{
"epoch": 1.7855247183827632,
"grad_norm": 0.0003004848840646446,
"learning_rate": 8.438818418521893e-07,
"loss": 0.0,
"step": 1118500
},
{
"epoch": 1.786322896620753,
"grad_norm": 0.00018394803919363767,
"learning_rate": 8.376714210328728e-07,
"loss": 0.0,
"step": 1119000
},
{
"epoch": 1.7871210748587423,
"grad_norm": 0.00023628614144399762,
"learning_rate": 8.314832805393008e-07,
"loss": 0.0,
"step": 1119500
},
{
"epoch": 1.787919253096732,
"grad_norm": 0.00016657341620884836,
"learning_rate": 8.253174301067141e-07,
"loss": 0.0,
"step": 1120000
},
{
"epoch": 1.7887174313347216,
"grad_norm": 0.00014997956168372184,
"learning_rate": 8.191738794352999e-07,
"loss": 0.0,
"step": 1120500
},
{
"epoch": 1.7895156095727112,
"grad_norm": 0.0047565544955432415,
"learning_rate": 8.130526381901488e-07,
"loss": 0.0,
"step": 1121000
},
{
"epoch": 1.790313787810701,
"grad_norm": 0.00021316143102012575,
"learning_rate": 8.069537160012741e-07,
"loss": 0.0,
"step": 1121500
},
{
"epoch": 1.7911119660486905,
"grad_norm": 0.00012046356278005987,
"learning_rate": 8.008771224635575e-07,
"loss": 0.0,
"step": 1122000
},
{
"epoch": 1.79191014428668,
"grad_norm": 0.010202116332948208,
"learning_rate": 7.948228671367653e-07,
"loss": 0.0,
"step": 1122500
},
{
"epoch": 1.7927083225246698,
"grad_norm": 0.00019842854817397892,
"learning_rate": 7.887909595455101e-07,
"loss": 0.0,
"step": 1123000
},
{
"epoch": 1.7935065007626592,
"grad_norm": 0.0003019568102899939,
"learning_rate": 7.827814091792546e-07,
"loss": 0.0,
"step": 1123500
},
{
"epoch": 1.794304679000649,
"grad_norm": 0.00022996992629487067,
"learning_rate": 7.767942254922927e-07,
"loss": 0.0,
"step": 1124000
},
{
"epoch": 1.7951028572386385,
"grad_norm": 0.00019457354210317135,
"learning_rate": 7.708294179037184e-07,
"loss": 0.0,
"step": 1124500
},
{
"epoch": 1.795901035476628,
"grad_norm": 0.00023033515026327223,
"learning_rate": 7.648869957974353e-07,
"loss": 0.0,
"step": 1125000
},
{
"epoch": 1.7966992137146178,
"grad_norm": 0.00018613325664773583,
"learning_rate": 7.589669685221251e-07,
"loss": 0.0,
"step": 1125500
},
{
"epoch": 1.7974973919526074,
"grad_norm": 0.00013318394485395402,
"learning_rate": 7.53069345391233e-07,
"loss": 0.0,
"step": 1126000
},
{
"epoch": 1.798295570190597,
"grad_norm": 0.00020175697864033282,
"learning_rate": 7.471941356829653e-07,
"loss": 0.0,
"step": 1126500
},
{
"epoch": 1.7990937484285867,
"grad_norm": 0.00015706892008893192,
"learning_rate": 7.413413486402637e-07,
"loss": 0.0,
"step": 1127000
},
{
"epoch": 1.799891926666576,
"grad_norm": 0.0005745620583184063,
"learning_rate": 7.35510993470796e-07,
"loss": 0.0,
"step": 1127500
},
{
"epoch": 1.8006901049045658,
"grad_norm": 0.00015678079216741025,
"learning_rate": 7.297030793469367e-07,
"loss": 0.0,
"step": 1128000
},
{
"epoch": 1.8014882831425554,
"grad_norm": 0.0001835569564718753,
"learning_rate": 7.239176154057619e-07,
"loss": 0.0,
"step": 1128500
},
{
"epoch": 1.802286461380545,
"grad_norm": 0.00017835032485891134,
"learning_rate": 7.181546107490228e-07,
"loss": 0.0,
"step": 1129000
},
{
"epoch": 1.8030846396185347,
"grad_norm": 0.00019548686395864934,
"learning_rate": 7.124140744431368e-07,
"loss": 0.0,
"step": 1129500
},
{
"epoch": 1.8038828178565243,
"grad_norm": 0.00024722403031773865,
"learning_rate": 7.066960155191781e-07,
"loss": 0.0,
"step": 1130000
},
{
"epoch": 1.8046809960945138,
"grad_norm": 0.00014821036893408746,
"learning_rate": 7.010004429728611e-07,
"loss": 0.0,
"step": 1130500
},
{
"epoch": 1.8054791743325036,
"grad_norm": 0.0010676577221602201,
"learning_rate": 6.953273657645182e-07,
"loss": 0.0,
"step": 1131000
},
{
"epoch": 1.8062773525704932,
"grad_norm": 0.00027099967701360583,
"learning_rate": 6.89676792819095e-07,
"loss": 0.0,
"step": 1131500
},
{
"epoch": 1.8070755308084827,
"grad_norm": 0.0002371660084463656,
"learning_rate": 6.840487330261308e-07,
"loss": 0.0,
"step": 1132000
},
{
"epoch": 1.8078737090464725,
"grad_norm": 0.0002728116814978421,
"learning_rate": 6.784431952397546e-07,
"loss": 0.0,
"step": 1132500
},
{
"epoch": 1.8086718872844618,
"grad_norm": 0.00018127913062926382,
"learning_rate": 6.728601882786523e-07,
"loss": 0.0,
"step": 1133000
},
{
"epoch": 1.8094700655224516,
"grad_norm": 0.00026493624318391085,
"learning_rate": 6.672997209260712e-07,
"loss": 0.0,
"step": 1133500
},
{
"epoch": 1.8102682437604412,
"grad_norm": 1832.54248046875,
"learning_rate": 6.617618019298005e-07,
"loss": 0.0,
"step": 1134000
},
{
"epoch": 1.8110664219984307,
"grad_norm": 0.0001290196378249675,
"learning_rate": 6.562464400021512e-07,
"loss": 0.0,
"step": 1134500
},
{
"epoch": 1.8118646002364205,
"grad_norm": 0.00015263666864484549,
"learning_rate": 6.507536438199474e-07,
"loss": 0.0,
"step": 1135000
},
{
"epoch": 1.81266277847441,
"grad_norm": 0.00016473012510687113,
"learning_rate": 6.452834220245168e-07,
"loss": 0.0,
"step": 1135500
},
{
"epoch": 1.8134609567123996,
"grad_norm": 0.0001999867963604629,
"learning_rate": 6.398357832216705e-07,
"loss": 0.0,
"step": 1136000
},
{
"epoch": 1.8142591349503894,
"grad_norm": 0.016133194789290428,
"learning_rate": 6.344107359816898e-07,
"loss": 0.0,
"step": 1136500
},
{
"epoch": 1.8150573131883787,
"grad_norm": 0.00015022288425825536,
"learning_rate": 6.290082888393172e-07,
"loss": 0.0,
"step": 1137000
},
{
"epoch": 1.8158554914263685,
"grad_norm": 0.00015219298074953258,
"learning_rate": 6.236284502937428e-07,
"loss": 0.0,
"step": 1137500
},
{
"epoch": 1.816653669664358,
"grad_norm": 0.00019694263755809516,
"learning_rate": 6.182712288085828e-07,
"loss": 0.0,
"step": 1138000
},
{
"epoch": 1.8174518479023476,
"grad_norm": 0.00015368903405033052,
"learning_rate": 6.129366328118758e-07,
"loss": 0.0,
"step": 1138500
},
{
"epoch": 1.8182500261403374,
"grad_norm": 0.0006411715294234455,
"learning_rate": 6.076246706960631e-07,
"loss": 0.0,
"step": 1139000
},
{
"epoch": 1.819048204378327,
"grad_norm": 0.00017495772044640034,
"learning_rate": 6.023353508179835e-07,
"loss": 0.0,
"step": 1139500
},
{
"epoch": 1.8198463826163165,
"grad_norm": 0.00028414788539521396,
"learning_rate": 5.970686814988474e-07,
"loss": 0.0,
"step": 1140000
},
{
"epoch": 1.8206445608543063,
"grad_norm": 0.00014593903324566782,
"learning_rate": 5.91824671024237e-07,
"loss": 0.0,
"step": 1140500
},
{
"epoch": 1.8214427390922956,
"grad_norm": 0.00016339162539225072,
"learning_rate": 5.866033276440863e-07,
"loss": 0.0,
"step": 1141000
},
{
"epoch": 1.8222409173302854,
"grad_norm": 0.00025076873134821653,
"learning_rate": 5.814046595726663e-07,
"loss": 0.0,
"step": 1141500
},
{
"epoch": 1.823039095568275,
"grad_norm": 0.00019486478413455188,
"learning_rate": 5.762286749885781e-07,
"loss": 0.0,
"step": 1142000
},
{
"epoch": 1.8238372738062645,
"grad_norm": 0.00026214728131890297,
"learning_rate": 5.710753820347331e-07,
"loss": 0.0,
"step": 1142500
},
{
"epoch": 1.8246354520442543,
"grad_norm": 0.000143070996273309,
"learning_rate": 5.659447888183511e-07,
"loss": 0.0,
"step": 1143000
},
{
"epoch": 1.8254336302822438,
"grad_norm": 0.00029296561842784286,
"learning_rate": 5.608369034109306e-07,
"loss": 0.0,
"step": 1143500
},
{
"epoch": 1.8262318085202334,
"grad_norm": 0.0001531924499431625,
"learning_rate": 5.557517338482537e-07,
"loss": 0.0,
"step": 1144000
},
{
"epoch": 1.8270299867582231,
"grad_norm": 0.000254272687016055,
"learning_rate": 5.50689288130366e-07,
"loss": 0.0,
"step": 1144500
},
{
"epoch": 1.8278281649962125,
"grad_norm": 0.00014179408026393503,
"learning_rate": 5.456495742215551e-07,
"loss": 0.0,
"step": 1145000
},
{
"epoch": 1.8286263432342023,
"grad_norm": 0.00011709563841577619,
"learning_rate": 5.406326000503553e-07,
"loss": 0.0,
"step": 1145500
},
{
"epoch": 1.829424521472192,
"grad_norm": 0.0021168107632547617,
"learning_rate": 5.356383735095249e-07,
"loss": 0.0,
"step": 1146000
},
{
"epoch": 1.8302226997101814,
"grad_norm": 0.00023661217710468918,
"learning_rate": 5.30666902456034e-07,
"loss": 0.0,
"step": 1146500
},
{
"epoch": 1.8310208779481711,
"grad_norm": 1002.8739624023438,
"learning_rate": 5.257181947110512e-07,
"loss": 0.0,
"step": 1147000
},
{
"epoch": 1.8318190561861607,
"grad_norm": 0.0001538294309284538,
"learning_rate": 5.207922580599356e-07,
"loss": 0.0,
"step": 1147500
},
{
"epoch": 1.8326172344241503,
"grad_norm": 0.00015853659715503454,
"learning_rate": 5.158891002522282e-07,
"loss": 0.0,
"step": 1148000
},
{
"epoch": 1.83341541266214,
"grad_norm": 0.0006100303144194186,
"learning_rate": 5.110087290016252e-07,
"loss": 0.0,
"step": 1148500
},
{
"epoch": 1.8342135909001296,
"grad_norm": 0.00016067329852376133,
"learning_rate": 5.061511519859785e-07,
"loss": 0.0,
"step": 1149000
},
{
"epoch": 1.8350117691381191,
"grad_norm": 0.00024289886641781777,
"learning_rate": 5.013163768472783e-07,
"loss": 0.0,
"step": 1149500
},
{
"epoch": 1.835809947376109,
"grad_norm": 0.00019153668836224824,
"learning_rate": 4.965044111916489e-07,
"loss": 0.0,
"step": 1150000
},
{
"epoch": 1.8366081256140983,
"grad_norm": 0.00018519822333473712,
"learning_rate": 4.917152625893212e-07,
"loss": 0.0,
"step": 1150500
},
{
"epoch": 1.837406303852088,
"grad_norm": 0.0001868938561528921,
"learning_rate": 4.869489385746367e-07,
"loss": 0.0,
"step": 1151000
},
{
"epoch": 1.8382044820900776,
"grad_norm": 0.000234300852753222,
"learning_rate": 4.82205446646029e-07,
"loss": 0.0,
"step": 1151500
},
{
"epoch": 1.8390026603280671,
"grad_norm": 0.00017578886763658375,
"learning_rate": 4.774847942660055e-07,
"loss": 0.0,
"step": 1152000
},
{
"epoch": 1.839800838566057,
"grad_norm": 0.00021545674826484174,
"learning_rate": 4.727869888611519e-07,
"loss": 0.0,
"step": 1152500
},
{
"epoch": 1.8405990168040465,
"grad_norm": 0.0005743197398260236,
"learning_rate": 4.681120378220982e-07,
"loss": 0.0,
"step": 1153000
},
{
"epoch": 1.841397195042036,
"grad_norm": 0.00012018709094263613,
"learning_rate": 4.634599485035346e-07,
"loss": 0.0,
"step": 1153500
},
{
"epoch": 1.8421953732800258,
"grad_norm": 0.00021630495029967278,
"learning_rate": 4.588307282241749e-07,
"loss": 0.0,
"step": 1154000
},
{
"epoch": 1.8429935515180151,
"grad_norm": 0.0001780359452823177,
"learning_rate": 4.542243842667537e-07,
"loss": 0.0,
"step": 1154500
},
{
"epoch": 1.843791729756005,
"grad_norm": 0.00031716233934275806,
"learning_rate": 4.496409238780258e-07,
"loss": 0.0,
"step": 1155000
},
{
"epoch": 1.8445899079939945,
"grad_norm": 0.0009638772462494671,
"learning_rate": 4.450803542687365e-07,
"loss": 0.0,
"step": 1155500
},
{
"epoch": 1.845388086231984,
"grad_norm": 0.00016452405543532223,
"learning_rate": 4.4054268261362496e-07,
"loss": 0.0,
"step": 1156000
},
{
"epoch": 1.8461862644699738,
"grad_norm": 0.0001442407228751108,
"learning_rate": 4.360279160514008e-07,
"loss": 0.0,
"step": 1156500
},
{
"epoch": 1.8469844427079634,
"grad_norm": 0.00014954974176362157,
"learning_rate": 4.315360616847458e-07,
"loss": 0.0,
"step": 1157000
},
{
"epoch": 1.847782620945953,
"grad_norm": 0.00043269319576211274,
"learning_rate": 4.2706712658029224e-07,
"loss": 0.0,
"step": 1157500
},
{
"epoch": 1.8485807991839427,
"grad_norm": 0.00015773314225953072,
"learning_rate": 4.226211177686162e-07,
"loss": 0.0,
"step": 1158000
},
{
"epoch": 1.849378977421932,
"grad_norm": 0.0002262149064335972,
"learning_rate": 4.1819804224422444e-07,
"loss": 0.0,
"step": 1158500
},
{
"epoch": 1.8501771556599218,
"grad_norm": 0.00019894444267265499,
"learning_rate": 4.137979069655473e-07,
"loss": 0.0,
"step": 1159000
},
{
"epoch": 1.8509753338979114,
"grad_norm": 0.0001444010267732665,
"learning_rate": 4.0942071885492595e-07,
"loss": 0.0,
"step": 1159500
},
{
"epoch": 1.851773512135901,
"grad_norm": 0.00019132070883642882,
"learning_rate": 4.0506648479859523e-07,
"loss": 0.0,
"step": 1160000
},
{
"epoch": 1.8525716903738907,
"grad_norm": 0.00022376253036782146,
"learning_rate": 4.007352116466889e-07,
"loss": 0.0,
"step": 1160500
},
{
"epoch": 1.8533698686118802,
"grad_norm": 0.00021582655608654022,
"learning_rate": 3.964269062132081e-07,
"loss": 0.0,
"step": 1161000
},
{
"epoch": 1.8541680468498698,
"grad_norm": 0.00011844315304188058,
"learning_rate": 3.921415752760227e-07,
"loss": 0.0,
"step": 1161500
},
{
"epoch": 1.8549662250878596,
"grad_norm": 0.000183063093572855,
"learning_rate": 3.878792255768615e-07,
"loss": 0.0,
"step": 1162000
},
{
"epoch": 1.8557644033258491,
"grad_norm": 0.0001891221763798967,
"learning_rate": 3.8363986382130066e-07,
"loss": 0.0,
"step": 1162500
},
{
"epoch": 1.8565625815638387,
"grad_norm": 0.0009782819543033838,
"learning_rate": 3.794234966787502e-07,
"loss": 0.0,
"step": 1163000
},
{
"epoch": 1.8573607598018285,
"grad_norm": 0.00015358542441390455,
"learning_rate": 3.7523013078243906e-07,
"loss": 0.0,
"step": 1163500
},
{
"epoch": 1.8581589380398178,
"grad_norm": 0.002302468754351139,
"learning_rate": 3.710597727294185e-07,
"loss": 0.0,
"step": 1164000
},
{
"epoch": 1.8589571162778076,
"grad_norm": 0.00019968704145867378,
"learning_rate": 3.6691242908054036e-07,
"loss": 0.0,
"step": 1164500
},
{
"epoch": 1.8597552945157971,
"grad_norm": 0.00015924213221296668,
"learning_rate": 3.6278810636044713e-07,
"loss": 0.0,
"step": 1165000
},
{
"epoch": 1.8605534727537867,
"grad_norm": 0.0002347809204366058,
"learning_rate": 3.586868110575686e-07,
"loss": 0.0,
"step": 1165500
},
{
"epoch": 1.8613516509917765,
"grad_norm": 0.00016319741553161293,
"learning_rate": 3.5460854962410526e-07,
"loss": 0.0,
"step": 1166000
},
{
"epoch": 1.862149829229766,
"grad_norm": 0.002569986740127206,
"learning_rate": 3.505533284760232e-07,
"loss": 0.0,
"step": 1166500
},
{
"epoch": 1.8629480074677556,
"grad_norm": 0.00019444114877842367,
"learning_rate": 3.465211539930374e-07,
"loss": 0.0,
"step": 1167000
},
{
"epoch": 1.8637461857057454,
"grad_norm": 0.00016536629118490964,
"learning_rate": 3.4251203251860876e-07,
"loss": 0.0,
"step": 1167500
},
{
"epoch": 1.8645443639437347,
"grad_norm": 0.00015523859474342316,
"learning_rate": 3.385259703599303e-07,
"loss": 0.0,
"step": 1168000
},
{
"epoch": 1.8653425421817245,
"grad_norm": 0.0039033798966556787,
"learning_rate": 3.345629737879158e-07,
"loss": 0.0,
"step": 1168500
},
{
"epoch": 1.866140720419714,
"grad_norm": 0.0003161656204611063,
"learning_rate": 3.306230490371931e-07,
"loss": 0.0,
"step": 1169000
},
{
"epoch": 1.8669388986577036,
"grad_norm": 0.0012187734246253967,
"learning_rate": 3.267062023060957e-07,
"loss": 0.0,
"step": 1169500
},
{
"epoch": 1.8677370768956933,
"grad_norm": 0.00017112820933107287,
"learning_rate": 3.228124397566479e-07,
"loss": 0.0,
"step": 1170000
},
{
"epoch": 1.868535255133683,
"grad_norm": 0.00031683017732575536,
"learning_rate": 3.189417675145578e-07,
"loss": 0.0,
"step": 1170500
},
{
"epoch": 1.8693334333716725,
"grad_norm": 0.000180508301127702,
"learning_rate": 3.1509419166920797e-07,
"loss": 0.0,
"step": 1171000
},
{
"epoch": 1.8701316116096622,
"grad_norm": 1.3118445873260498,
"learning_rate": 3.1126971827364627e-07,
"loss": 0.0,
"step": 1171500
},
{
"epoch": 1.8709297898476516,
"grad_norm": 0.00019488642283249646,
"learning_rate": 3.074683533445749e-07,
"loss": 0.0,
"step": 1172000
},
{
"epoch": 1.8717279680856413,
"grad_norm": 0.0001434733421774581,
"learning_rate": 3.036901028623401e-07,
"loss": 0.0,
"step": 1172500
},
{
"epoch": 1.872526146323631,
"grad_norm": 0.0001922912779264152,
"learning_rate": 2.999349727709272e-07,
"loss": 0.0,
"step": 1173000
},
{
"epoch": 1.8733243245616205,
"grad_norm": 0.00012995305587537587,
"learning_rate": 2.9620296897794553e-07,
"loss": 0.0,
"step": 1173500
},
{
"epoch": 1.8741225027996102,
"grad_norm": 0.0002586755435913801,
"learning_rate": 2.924940973546236e-07,
"loss": 0.0,
"step": 1174000
},
{
"epoch": 1.8749206810375998,
"grad_norm": 0.0007729693315923214,
"learning_rate": 2.888083637357991e-07,
"loss": 0.0,
"step": 1174500
},
{
"epoch": 1.8757188592755893,
"grad_norm": 0.00014659270527772605,
"learning_rate": 2.8514577391990536e-07,
"loss": 0.0,
"step": 1175000
},
{
"epoch": 1.8765170375135791,
"grad_norm": 0.00019715832604561,
"learning_rate": 2.815063336689683e-07,
"loss": 0.0,
"step": 1175500
},
{
"epoch": 1.8773152157515685,
"grad_norm": 0.0001621273549972102,
"learning_rate": 2.778900487085945e-07,
"loss": 0.0,
"step": 1176000
},
{
"epoch": 1.8781133939895582,
"grad_norm": 0.00016097325715236366,
"learning_rate": 2.742969247279614e-07,
"loss": 0.0,
"step": 1176500
},
{
"epoch": 1.878911572227548,
"grad_norm": 0.0001756875280989334,
"learning_rate": 2.707269673798074e-07,
"loss": 0.0,
"step": 1177000
},
{
"epoch": 1.8797097504655373,
"grad_norm": 0.00022921212075743824,
"learning_rate": 2.671801822804315e-07,
"loss": 0.0,
"step": 1177500
},
{
"epoch": 1.8805079287035271,
"grad_norm": 0.0003720010572578758,
"learning_rate": 2.63656575009672e-07,
"loss": 0.0,
"step": 1178000
},
{
"epoch": 1.8813061069415167,
"grad_norm": 0.00016360900190193206,
"learning_rate": 2.6015615111090465e-07,
"loss": 0.0,
"step": 1178500
},
{
"epoch": 1.8821042851795062,
"grad_norm": 0.00013911504356656224,
"learning_rate": 2.566789160910343e-07,
"loss": 0.0,
"step": 1179000
},
{
"epoch": 1.882902463417496,
"grad_norm": 0.00016801197489257902,
"learning_rate": 2.532248754204819e-07,
"loss": 0.0,
"step": 1179500
},
{
"epoch": 1.8837006416554856,
"grad_norm": 0.00014275613648351282,
"learning_rate": 2.497940345331856e-07,
"loss": 0.0,
"step": 1180000
},
{
"epoch": 1.8844988198934751,
"grad_norm": 0.00018284647376276553,
"learning_rate": 2.4638639882657634e-07,
"loss": 0.0,
"step": 1180500
},
{
"epoch": 1.885296998131465,
"grad_norm": 0.0001408082462148741,
"learning_rate": 2.430019736615824e-07,
"loss": 0.0,
"step": 1181000
},
{
"epoch": 1.8860951763694542,
"grad_norm": 0.00038785370998084545,
"learning_rate": 2.396407643626214e-07,
"loss": 0.0,
"step": 1181500
},
{
"epoch": 1.886893354607444,
"grad_norm": 0.000422690442064777,
"learning_rate": 2.3630277621758178e-07,
"loss": 0.0,
"step": 1182000
},
{
"epoch": 1.8876915328454336,
"grad_norm": 0.00020551889610942453,
"learning_rate": 2.3298801447782126e-07,
"loss": 0.0,
"step": 1182500
},
{
"epoch": 1.8884897110834231,
"grad_norm": 0.00021077659039292485,
"learning_rate": 2.2969648435815671e-07,
"loss": 0.0,
"step": 1183000
},
{
"epoch": 1.889287889321413,
"grad_norm": 0.00023901699751149863,
"learning_rate": 2.2642819103686263e-07,
"loss": 0.0,
"step": 1183500
},
{
"epoch": 1.8900860675594024,
"grad_norm": 0.00018016165995504707,
"learning_rate": 2.2318313965564773e-07,
"loss": 0.0,
"step": 1184000
},
{
"epoch": 1.890884245797392,
"grad_norm": 0.00011153416562592611,
"learning_rate": 2.1996133531966668e-07,
"loss": 0.0,
"step": 1184500
},
{
"epoch": 1.8916824240353818,
"grad_norm": 0.000145767378853634,
"learning_rate": 2.1676278309748997e-07,
"loss": 0.0,
"step": 1185000
},
{
"epoch": 1.8924806022733711,
"grad_norm": 0.00012787348532583565,
"learning_rate": 2.1358748802111917e-07,
"loss": 0.0,
"step": 1185500
},
{
"epoch": 1.893278780511361,
"grad_norm": 0.0002693917485885322,
"learning_rate": 2.1043545508596162e-07,
"loss": 0.0,
"step": 1186000
},
{
"epoch": 1.8940769587493504,
"grad_norm": 0.00017825972463469952,
"learning_rate": 2.0730668925082575e-07,
"loss": 0.0,
"step": 1186500
},
{
"epoch": 1.89487513698734,
"grad_norm": 0.0002469649480190128,
"learning_rate": 2.0420119543792248e-07,
"loss": 0.0,
"step": 1187000
},
{
"epoch": 1.8956733152253298,
"grad_norm": 0.027682464569807053,
"learning_rate": 2.0111897853284544e-07,
"loss": 0.0,
"step": 1187500
},
{
"epoch": 1.8964714934633193,
"grad_norm": 0.0001725145266391337,
"learning_rate": 1.980600433845725e-07,
"loss": 0.0,
"step": 1188000
},
{
"epoch": 1.897269671701309,
"grad_norm": 0.00032132607884705067,
"learning_rate": 1.9502439480545087e-07,
"loss": 0.0,
"step": 1188500
},
{
"epoch": 1.8980678499392987,
"grad_norm": 0.00022941759380046278,
"learning_rate": 1.9201203757120034e-07,
"loss": 0.0,
"step": 1189000
},
{
"epoch": 1.898866028177288,
"grad_norm": 0.00014870801533106714,
"learning_rate": 1.8902297642088673e-07,
"loss": 0.0,
"step": 1189500
},
{
"epoch": 1.8996642064152778,
"grad_norm": 0.0001499906793469563,
"learning_rate": 1.8605721605693515e-07,
"loss": 0.0,
"step": 1190000
},
{
"epoch": 1.9004623846532673,
"grad_norm": 0.00012592818529810756,
"learning_rate": 1.8311476114511505e-07,
"loss": 0.0,
"step": 1190500
},
{
"epoch": 1.9012605628912569,
"grad_norm": 0.00014124861627351493,
"learning_rate": 1.8019561631452185e-07,
"loss": 0.0,
"step": 1191000
},
{
"epoch": 1.9020587411292467,
"grad_norm": 0.0001274181850021705,
"learning_rate": 1.7729978615758868e-07,
"loss": 0.0,
"step": 1191500
},
{
"epoch": 1.9028569193672362,
"grad_norm": 0.0002295897575095296,
"learning_rate": 1.744272752300663e-07,
"loss": 0.0,
"step": 1192000
},
{
"epoch": 1.9036550976052258,
"grad_norm": 0.00015409973275382072,
"learning_rate": 1.7157808805101648e-07,
"loss": 0.0,
"step": 1192500
},
{
"epoch": 1.9044532758432156,
"grad_norm": 0.0002241683832835406,
"learning_rate": 1.687522291028154e-07,
"loss": 0.0,
"step": 1193000
},
{
"epoch": 1.905251454081205,
"grad_norm": 0.0004630073963198811,
"learning_rate": 1.659497028311302e-07,
"loss": 0.0,
"step": 1193500
},
{
"epoch": 1.9060496323191947,
"grad_norm": 0.00024872427457012236,
"learning_rate": 1.631705136449274e-07,
"loss": 0.0,
"step": 1194000
},
{
"epoch": 1.9068478105571844,
"grad_norm": 0.0001354358100797981,
"learning_rate": 1.6041466591645627e-07,
"loss": 0.0,
"step": 1194500
},
{
"epoch": 1.9076459887951738,
"grad_norm": 0.00018827700114343315,
"learning_rate": 1.5768216398124535e-07,
"loss": 0.0,
"step": 1195000
},
{
"epoch": 1.9084441670331636,
"grad_norm": 0.0002568909549154341,
"learning_rate": 1.54973012138096e-07,
"loss": 0.0,
"step": 1195500
},
{
"epoch": 1.909242345271153,
"grad_norm": 0.00014703450142405927,
"learning_rate": 1.522872146490739e-07,
"loss": 0.0,
"step": 1196000
},
{
"epoch": 1.9100405235091427,
"grad_norm": 0.0002241420588688925,
"learning_rate": 1.496247757395075e-07,
"loss": 0.0,
"step": 1196500
},
{
"epoch": 1.9108387017471324,
"grad_norm": 0.00019378996512386948,
"learning_rate": 1.469856995979696e-07,
"loss": 0.0,
"step": 1197000
},
{
"epoch": 1.911636879985122,
"grad_norm": 0.0004964235122315586,
"learning_rate": 1.4436999037628407e-07,
"loss": 0.0,
"step": 1197500
},
{
"epoch": 1.9124350582231116,
"grad_norm": 0.0001157882870757021,
"learning_rate": 1.4177765218951422e-07,
"loss": 0.0,
"step": 1198000
},
{
"epoch": 1.9132332364611013,
"grad_norm": 0.0001476890465710312,
"learning_rate": 1.3920868911595275e-07,
"loss": 0.0,
"step": 1198500
},
{
"epoch": 1.9140314146990907,
"grad_norm": 0.0002459689858369529,
"learning_rate": 1.3666310519711843e-07,
"loss": 0.0,
"step": 1199000
},
{
"epoch": 1.9148295929370804,
"grad_norm": 0.0001660026318859309,
"learning_rate": 1.3414090443775285e-07,
"loss": 0.0,
"step": 1199500
},
{
"epoch": 1.91562777117507,
"grad_norm": 0.0002628338697832078,
"learning_rate": 1.3164209080581025e-07,
"loss": 0.0,
"step": 1200000
},
{
"epoch": 1.91562777117507,
"eval_loss": 1.6006262740120292e-05,
"eval_runtime": 22149.8885,
"eval_samples_per_second": 100.556,
"eval_steps_per_second": 3.142,
"step": 1200000
}
],
"logging_steps": 500,
"max_steps": 1252852,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 150000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.055996293644091e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}