| { |
| "best_metric": 1.1492022167658433e-05, |
| "best_model_checkpoint": "./ByT5/results_bangla_byt5_full_512/checkpoint-750000", |
| "epoch": 1.91562777117507, |
| "eval_steps": 150000, |
| "global_step": 1200000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007981782379896125, |
| "grad_norm": 1.0358465909957886, |
| "learning_rate": 3e-05, |
| "loss": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.001596356475979225, |
| "grad_norm": 0.3616809546947479, |
| "learning_rate": 2.999998820092034e-05, |
| "loss": 0.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0023945347139688374, |
| "grad_norm": 0.09030964225530624, |
| "learning_rate": 2.999995280369993e-05, |
| "loss": 0.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.00319271295195845, |
| "grad_norm": 0.7502796649932861, |
| "learning_rate": 2.9999893808394453e-05, |
| "loss": 0.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.003990891189948062, |
| "grad_norm": 407.71380615234375, |
| "learning_rate": 2.9999811215096716e-05, |
| "loss": 0.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.004789069427937675, |
| "grad_norm": 0.033743228763341904, |
| "learning_rate": 2.9999705023936664e-05, |
| "loss": 0.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.005587247665927287, |
| "grad_norm": 62.31935119628906, |
| "learning_rate": 2.999957523508135e-05, |
| "loss": 0.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.0063854259039169, |
| "grad_norm": 0.005816894117742777, |
| "learning_rate": 2.9999421848734972e-05, |
| "loss": 0.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.007183604141906512, |
| "grad_norm": 0.01169545203447342, |
| "learning_rate": 2.9999244865138825e-05, |
| "loss": 0.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.007981782379896125, |
| "grad_norm": 0.1192362904548645, |
| "learning_rate": 2.999904428457135e-05, |
| "loss": 0.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.008779960617885737, |
| "grad_norm": 0.03755159303545952, |
| "learning_rate": 2.99988201073481e-05, |
| "loss": 0.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.00957813885587535, |
| "grad_norm": 0.006901148706674576, |
| "learning_rate": 2.999857233382175e-05, |
| "loss": 0.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.010376317093864962, |
| "grad_norm": 0.005667822901159525, |
| "learning_rate": 2.9998300964382105e-05, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.011174495331854575, |
| "grad_norm": 0.009163002483546734, |
| "learning_rate": 2.9998005999456086e-05, |
| "loss": 0.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.011972673569844187, |
| "grad_norm": 26.250308990478516, |
| "learning_rate": 2.9997687439507734e-05, |
| "loss": 0.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.0127708518078338, |
| "grad_norm": 0.0081906383857131, |
| "learning_rate": 2.9997345285038203e-05, |
| "loss": 0.0, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.013569030045823412, |
| "grad_norm": 0.005343407858163118, |
| "learning_rate": 2.9996979536585784e-05, |
| "loss": 0.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.014367208283813025, |
| "grad_norm": 0.004830050282180309, |
| "learning_rate": 2.999659019472588e-05, |
| "loss": 0.0, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.015165386521802637, |
| "grad_norm": 0.005266611464321613, |
| "learning_rate": 2.9996177260070993e-05, |
| "loss": 0.0, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.01596356475979225, |
| "grad_norm": 0.006467476487159729, |
| "learning_rate": 2.999574073327077e-05, |
| "loss": 0.0, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.016761742997781864, |
| "grad_norm": 0.005009402055293322, |
| "learning_rate": 2.9995280615011947e-05, |
| "loss": 0.0, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.017559921235771474, |
| "grad_norm": 0.10507344454526901, |
| "learning_rate": 2.99947969060184e-05, |
| "loss": 0.0, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.01835809947376109, |
| "grad_norm": 0.004155490081757307, |
| "learning_rate": 2.9994289607051097e-05, |
| "loss": 0.0, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.0191562777117507, |
| "grad_norm": 0.005153627134859562, |
| "learning_rate": 2.9993758718908127e-05, |
| "loss": 0.0, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.019954455949740314, |
| "grad_norm": 0.006224519573152065, |
| "learning_rate": 2.9993204242424692e-05, |
| "loss": 0.0, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.020752634187729924, |
| "grad_norm": 0.006426576059311628, |
| "learning_rate": 2.9992626178473094e-05, |
| "loss": 0.0, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.02155081242571954, |
| "grad_norm": 0.004561313893646002, |
| "learning_rate": 2.9992024527962764e-05, |
| "loss": 0.0, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.02234899066370915, |
| "grad_norm": 0.004213905427604914, |
| "learning_rate": 2.9991399291840204e-05, |
| "loss": 0.0, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.023147168901698763, |
| "grad_norm": 11.763763427734375, |
| "learning_rate": 2.9990750471089053e-05, |
| "loss": 0.0, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.023945347139688374, |
| "grad_norm": 3612.91552734375, |
| "learning_rate": 2.9990078066730048e-05, |
| "loss": 0.0, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.02474352537767799, |
| "grad_norm": 0.0035761911422014236, |
| "learning_rate": 2.9989382079821016e-05, |
| "loss": 0.0, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.0255417036156676, |
| "grad_norm": 0.00994983222335577, |
| "learning_rate": 2.998866251145689e-05, |
| "loss": 0.0, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.026339881853657213, |
| "grad_norm": 1838.4853515625, |
| "learning_rate": 2.9987919362769707e-05, |
| "loss": 0.0, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.027138060091646824, |
| "grad_norm": 980.426513671875, |
| "learning_rate": 2.998715263492859e-05, |
| "loss": 0.0, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.02793623832963644, |
| "grad_norm": 0.005308611784130335, |
| "learning_rate": 2.9986362329139772e-05, |
| "loss": 0.0, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.02873441656762605, |
| "grad_norm": 0.010192105546593666, |
| "learning_rate": 2.9985548446646566e-05, |
| "loss": 0.0, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.029532594805615663, |
| "grad_norm": 235.67745971679688, |
| "learning_rate": 2.9984710988729377e-05, |
| "loss": 0.0, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.030330773043605274, |
| "grad_norm": 0.07810617983341217, |
| "learning_rate": 2.9983849956705706e-05, |
| "loss": 0.0, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.031128951281594888, |
| "grad_norm": 0.008812849409878254, |
| "learning_rate": 2.9982965351930143e-05, |
| "loss": 0.0, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.0319271295195845, |
| "grad_norm": 0.0035495434422045946, |
| "learning_rate": 2.9982057175794347e-05, |
| "loss": 0.0, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.03272530775757411, |
| "grad_norm": 59.26214599609375, |
| "learning_rate": 2.998112542972708e-05, |
| "loss": 0.0, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.03352348599556373, |
| "grad_norm": 2.114682674407959, |
| "learning_rate": 2.9980170115194166e-05, |
| "loss": 0.0, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.03432166423355334, |
| "grad_norm": 0.00623566098511219, |
| "learning_rate": 2.9979191233698526e-05, |
| "loss": 0.0, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.03511984247154295, |
| "grad_norm": 0.7668495774269104, |
| "learning_rate": 2.9978188786780137e-05, |
| "loss": 0.0, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.03591802070953256, |
| "grad_norm": 0.009474883787333965, |
| "learning_rate": 2.997716277601607e-05, |
| "loss": 0.0, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.03671619894752218, |
| "grad_norm": 0.0059985085390508175, |
| "learning_rate": 2.997611320302044e-05, |
| "loss": 0.0, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.03751437718551179, |
| "grad_norm": 0.0021645475644618273, |
| "learning_rate": 2.9975040069444463e-05, |
| "loss": 0.0, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.0383125554235014, |
| "grad_norm": 0.0018036281690001488, |
| "learning_rate": 2.9973943376976393e-05, |
| "loss": 0.0, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.03911073366149101, |
| "grad_norm": 740.995849609375, |
| "learning_rate": 2.9972823127341566e-05, |
| "loss": 0.0, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.03990891189948063, |
| "grad_norm": 0.11744941025972366, |
| "learning_rate": 2.9971679322302368e-05, |
| "loss": 0.0, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.04070709013747024, |
| "grad_norm": 0.014604251831769943, |
| "learning_rate": 2.997051196365824e-05, |
| "loss": 0.0, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.04150526837545985, |
| "grad_norm": 0.0021100931335240602, |
| "learning_rate": 2.996932105324569e-05, |
| "loss": 0.0001, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.042303446613449466, |
| "grad_norm": 747.1854858398438, |
| "learning_rate": 2.9968106592938267e-05, |
| "loss": 0.0, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.04310162485143908, |
| "grad_norm": 0.02324901707470417, |
| "learning_rate": 2.9966868584646574e-05, |
| "loss": 0.0, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.04389980308942869, |
| "grad_norm": 0.00435708649456501, |
| "learning_rate": 2.996560703031826e-05, |
| "loss": 0.0, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.0446979813274183, |
| "grad_norm": 0.002938190009444952, |
| "learning_rate": 2.9964321931938013e-05, |
| "loss": 0.0, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.045496159565407916, |
| "grad_norm": 0.0124465087428689, |
| "learning_rate": 2.9963013291527564e-05, |
| "loss": 0.0, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.04629433780339753, |
| "grad_norm": 1176.4481201171875, |
| "learning_rate": 2.996168111114568e-05, |
| "loss": 0.0, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.04709251604138714, |
| "grad_norm": 0.0027969577349722385, |
| "learning_rate": 2.996032539288817e-05, |
| "loss": 0.0, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.04789069427937675, |
| "grad_norm": 0.006387583911418915, |
| "learning_rate": 2.9958946138887848e-05, |
| "loss": 0.0, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.048688872517366366, |
| "grad_norm": 0.0018514322582632303, |
| "learning_rate": 2.9957543351314586e-05, |
| "loss": 0.0, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.04948705075535598, |
| "grad_norm": 0.002022047759965062, |
| "learning_rate": 2.9956117032375252e-05, |
| "loss": 0.0, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.05028522899334559, |
| "grad_norm": 0.0020059312228113413, |
| "learning_rate": 2.9954667184313755e-05, |
| "loss": 0.0, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.0510834072313352, |
| "grad_norm": 0.0016115437028929591, |
| "learning_rate": 2.995319380941101e-05, |
| "loss": 0.0, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.051881585469324816, |
| "grad_norm": 0.020331773906946182, |
| "learning_rate": 2.995169690998494e-05, |
| "loss": 0.0, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.05267976370731443, |
| "grad_norm": 1381.0771484375, |
| "learning_rate": 2.995017648839049e-05, |
| "loss": 0.0, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.05347794194530404, |
| "grad_norm": 235.88551330566406, |
| "learning_rate": 2.9948632547019604e-05, |
| "loss": 0.0, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.05427612018329365, |
| "grad_norm": 0.00871993601322174, |
| "learning_rate": 2.994706508830122e-05, |
| "loss": 0.0, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.055074298421283266, |
| "grad_norm": 0.036717597395181656, |
| "learning_rate": 2.9945474114701287e-05, |
| "loss": 0.0, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.05587247665927288, |
| "grad_norm": 21.093196868896484, |
| "learning_rate": 2.994385962872274e-05, |
| "loss": 0.0, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.05667065489726249, |
| "grad_norm": 0.05062306299805641, |
| "learning_rate": 2.99422216329055e-05, |
| "loss": 0.0, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.0574688331352521, |
| "grad_norm": 0.0021120295859873295, |
| "learning_rate": 2.9940560129826485e-05, |
| "loss": 0.0, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.058267011373241716, |
| "grad_norm": 0.03065457195043564, |
| "learning_rate": 2.993887512209959e-05, |
| "loss": 0.0, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.05906518961123133, |
| "grad_norm": 2.3146629333496094, |
| "learning_rate": 2.9937166612375685e-05, |
| "loss": 0.0, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.05986336784922094, |
| "grad_norm": 7.780862331390381, |
| "learning_rate": 2.9935434603342616e-05, |
| "loss": 0.0, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.06066154608721055, |
| "grad_norm": 0.0014545947778970003, |
| "learning_rate": 2.99336790977252e-05, |
| "loss": 0.0, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.061459724325200166, |
| "grad_norm": 0.6645253896713257, |
| "learning_rate": 2.9931900098285214e-05, |
| "loss": 0.0, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.062257902563189776, |
| "grad_norm": 0.0018458861159160733, |
| "learning_rate": 2.99300976078214e-05, |
| "loss": 0.0001, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.06305608080117939, |
| "grad_norm": 103.37657165527344, |
| "learning_rate": 2.9928271629169453e-05, |
| "loss": 0.0, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.063854259039169, |
| "grad_norm": 6.414219856262207, |
| "learning_rate": 2.9926422165202025e-05, |
| "loss": 0.0, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.06465243727715861, |
| "grad_norm": 0.0033696407917886972, |
| "learning_rate": 2.992454921882871e-05, |
| "loss": 0.0, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.06545061551514822, |
| "grad_norm": 0.001432748162187636, |
| "learning_rate": 2.9922652792996056e-05, |
| "loss": 0.0, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.06624879375313784, |
| "grad_norm": 0.002302892506122589, |
| "learning_rate": 2.9920732890687525e-05, |
| "loss": 0.0, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.06704697199112745, |
| "grad_norm": 0.011892186477780342, |
| "learning_rate": 2.9918789514923535e-05, |
| "loss": 0.0, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.06784515022911707, |
| "grad_norm": 0.0011868340661749244, |
| "learning_rate": 2.991682266876143e-05, |
| "loss": 0.0, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.06864332846710668, |
| "grad_norm": 0.0014516868395730853, |
| "learning_rate": 2.9914832355295472e-05, |
| "loss": 0.0, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.06944150670509629, |
| "grad_norm": 0.0018224489176645875, |
| "learning_rate": 2.9912818577656834e-05, |
| "loss": 0.0, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.0702396849430859, |
| "grad_norm": 0.002470798557624221, |
| "learning_rate": 2.991078133901362e-05, |
| "loss": 0.0, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.07103786318107551, |
| "grad_norm": 0.0014030230231583118, |
| "learning_rate": 2.9908720642570837e-05, |
| "loss": 0.0, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.07183604141906512, |
| "grad_norm": 0.002383199753239751, |
| "learning_rate": 2.9906636491570395e-05, |
| "loss": 0.0, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.07263421965705474, |
| "grad_norm": 0.0012940465239807963, |
| "learning_rate": 2.9904528889291094e-05, |
| "loss": 0.0, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.07343239789504435, |
| "grad_norm": 0.001975142164155841, |
| "learning_rate": 2.9902397839048644e-05, |
| "loss": 0.0, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.07423057613303397, |
| "grad_norm": 0.0015532145043835044, |
| "learning_rate": 2.990024334419563e-05, |
| "loss": 0.0, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.07502875437102358, |
| "grad_norm": 0.013329227454960346, |
| "learning_rate": 2.989806540812153e-05, |
| "loss": 0.0, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.07582693260901319, |
| "grad_norm": 0.0020695908460766077, |
| "learning_rate": 2.98958640342527e-05, |
| "loss": 0.0, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.0766251108470028, |
| "grad_norm": 206.78863525390625, |
| "learning_rate": 2.9893639226052356e-05, |
| "loss": 0.0, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.07742328908499241, |
| "grad_norm": 0.018143679946660995, |
| "learning_rate": 2.98913909870206e-05, |
| "loss": 0.0, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.07822146732298202, |
| "grad_norm": 0.014422932639718056, |
| "learning_rate": 2.988911932069438e-05, |
| "loss": 0.0, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.07901964556097164, |
| "grad_norm": 0.0015423446893692017, |
| "learning_rate": 2.98868242306475e-05, |
| "loss": 0.0, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.07981782379896125, |
| "grad_norm": 2.980130434036255, |
| "learning_rate": 2.9884505720490628e-05, |
| "loss": 0.0, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.08061600203695087, |
| "grad_norm": 0.006626432295888662, |
| "learning_rate": 2.9882163793871268e-05, |
| "loss": 0.0, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.08141418027494048, |
| "grad_norm": 0.0019104316597804427, |
| "learning_rate": 2.987979845447376e-05, |
| "loss": 0.0, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.08221235851293009, |
| "grad_norm": 0.006668619811534882, |
| "learning_rate": 2.9877409706019286e-05, |
| "loss": 0.0, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.0830105367509197, |
| "grad_norm": 0.001458686776459217, |
| "learning_rate": 2.9874997552265847e-05, |
| "loss": 0.0, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.08380871498890931, |
| "grad_norm": 2472.781005859375, |
| "learning_rate": 2.9872561997008265e-05, |
| "loss": 0.0, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.08460689322689893, |
| "grad_norm": 0.0012460598954930902, |
| "learning_rate": 2.987010304407819e-05, |
| "loss": 0.0, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.08540507146488854, |
| "grad_norm": 0.0019219612004235387, |
| "learning_rate": 2.9867620697344072e-05, |
| "loss": 0.0, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.08620324970287815, |
| "grad_norm": 0.002554278587922454, |
| "learning_rate": 2.986511496071116e-05, |
| "loss": 0.0, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.08700142794086776, |
| "grad_norm": 0.039569783955812454, |
| "learning_rate": 2.9862585838121507e-05, |
| "loss": 0.0, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.08779960617885738, |
| "grad_norm": 0.0019195139175280929, |
| "learning_rate": 2.9860033333553957e-05, |
| "loss": 0.0, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.08859778441684699, |
| "grad_norm": 0.002144334837794304, |
| "learning_rate": 2.985745745102414e-05, |
| "loss": 0.0, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.0893959626548366, |
| "grad_norm": 0.0011981218121945858, |
| "learning_rate": 2.985485819458445e-05, |
| "loss": 0.0, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.09019414089282621, |
| "grad_norm": 0.0018990024691447616, |
| "learning_rate": 2.985223556832408e-05, |
| "loss": 0.0, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.09099231913081583, |
| "grad_norm": 0.0011976829264312983, |
| "learning_rate": 2.9849589576368962e-05, |
| "loss": 0.0, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.09179049736880544, |
| "grad_norm": 0.776136577129364, |
| "learning_rate": 2.9846920222881807e-05, |
| "loss": 0.0, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.09258867560679505, |
| "grad_norm": 6463.4013671875, |
| "learning_rate": 2.984422751206206e-05, |
| "loss": 0.0, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.09338685384478466, |
| "grad_norm": 3361.28564453125, |
| "learning_rate": 2.9841511448145938e-05, |
| "loss": 0.0, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.09418503208277428, |
| "grad_norm": 0.0008196167182177305, |
| "learning_rate": 2.9838772035406367e-05, |
| "loss": 0.0, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.09498321032076389, |
| "grad_norm": 0.004590105731040239, |
| "learning_rate": 2.9836009278153024e-05, |
| "loss": 0.0, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.0957813885587535, |
| "grad_norm": 0.002218448556959629, |
| "learning_rate": 2.9833223180732315e-05, |
| "loss": 0.0, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.09657956679674311, |
| "grad_norm": 0.0009515349520370364, |
| "learning_rate": 2.9830413747527352e-05, |
| "loss": 0.0, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.09737774503473273, |
| "grad_norm": 0.0013340599834918976, |
| "learning_rate": 2.982758098295796e-05, |
| "loss": 0.0, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.09817592327272234, |
| "grad_norm": 0.0008279486792162061, |
| "learning_rate": 2.9824724891480688e-05, |
| "loss": 0.0, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.09897410151071195, |
| "grad_norm": 0.002933940151706338, |
| "learning_rate": 2.9821845477588752e-05, |
| "loss": 0.0, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.09977227974870156, |
| "grad_norm": 0.0022352703381329775, |
| "learning_rate": 2.98189427458121e-05, |
| "loss": 0.0, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.10057045798669118, |
| "grad_norm": 0.0006310699391178787, |
| "learning_rate": 2.9816016700717314e-05, |
| "loss": 0.0, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.10136863622468079, |
| "grad_norm": 0.0008447402506135404, |
| "learning_rate": 2.9813067346907694e-05, |
| "loss": 0.0, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.1021668144626704, |
| "grad_norm": 0.001062876428477466, |
| "learning_rate": 2.9810094689023198e-05, |
| "loss": 0.0, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.10296499270066001, |
| "grad_norm": 0.0022323522716760635, |
| "learning_rate": 2.9807098731740432e-05, |
| "loss": 0.0, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.10376317093864963, |
| "grad_norm": 0.0006886335904709995, |
| "learning_rate": 2.980407947977268e-05, |
| "loss": 0.0, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.10456134917663924, |
| "grad_norm": 0.001781968749128282, |
| "learning_rate": 2.9801036937869845e-05, |
| "loss": 0.0, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.10535952741462885, |
| "grad_norm": 0.0010141967795789242, |
| "learning_rate": 2.9797971110818502e-05, |
| "loss": 0.0, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.10615770565261846, |
| "grad_norm": 0.0032014320604503155, |
| "learning_rate": 2.979488200344184e-05, |
| "loss": 0.0, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.10695588389060807, |
| "grad_norm": 766.4085693359375, |
| "learning_rate": 2.9791769620599665e-05, |
| "loss": 0.0, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.10775406212859769, |
| "grad_norm": 0.0012145901564508677, |
| "learning_rate": 2.978863396718842e-05, |
| "loss": 0.0, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.1085522403665873, |
| "grad_norm": 0.005595668219029903, |
| "learning_rate": 2.9785475048141148e-05, |
| "loss": 0.0, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.10935041860457691, |
| "grad_norm": 0.0015825449954718351, |
| "learning_rate": 2.9782292868427488e-05, |
| "loss": 0.0, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.11014859684256653, |
| "grad_norm": 0.00758874136954546, |
| "learning_rate": 2.9779087433053687e-05, |
| "loss": 0.0, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.11094677508055614, |
| "grad_norm": 1.4108655452728271, |
| "learning_rate": 2.9775858747062564e-05, |
| "loss": 0.0, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.11174495331854575, |
| "grad_norm": 0.0010229300241917372, |
| "learning_rate": 2.9772606815533523e-05, |
| "loss": 0.0, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.11254313155653536, |
| "grad_norm": 0.12247798591852188, |
| "learning_rate": 2.9769331643582543e-05, |
| "loss": 0.0, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.11334130979452497, |
| "grad_norm": 0.0008451018366031349, |
| "learning_rate": 2.9766033236362148e-05, |
| "loss": 0.0, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.11413948803251459, |
| "grad_norm": 0.000856923230458051, |
| "learning_rate": 2.9762711599061435e-05, |
| "loss": 0.0, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.1149376662705042, |
| "grad_norm": 0.0016039250185713172, |
| "learning_rate": 2.9759366736906045e-05, |
| "loss": 0.0, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.11573584450849381, |
| "grad_norm": 40.93400955200195, |
| "learning_rate": 2.9755998655158137e-05, |
| "loss": 0.0, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.11653402274648343, |
| "grad_norm": 0.000588159600738436, |
| "learning_rate": 2.9752607359116423e-05, |
| "loss": 0.0, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.11733220098447304, |
| "grad_norm": 127.08687591552734, |
| "learning_rate": 2.974919285411612e-05, |
| "loss": 0.0, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.11813037922246265, |
| "grad_norm": 0.001267329789698124, |
| "learning_rate": 2.9745755145528964e-05, |
| "loss": 0.0, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.11892855746045226, |
| "grad_norm": 0.0433398075401783, |
| "learning_rate": 2.97422942387632e-05, |
| "loss": 0.0, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.11972673569844187, |
| "grad_norm": 0.02973037399351597, |
| "learning_rate": 2.973881013926356e-05, |
| "loss": 0.0, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.12052491393643149, |
| "grad_norm": 0.0013027731329202652, |
| "learning_rate": 2.9735302852511267e-05, |
| "loss": 0.0, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.1213230921744211, |
| "grad_norm": 0.018169229850172997, |
| "learning_rate": 2.9731772384024015e-05, |
| "loss": 0.0, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.12212127041241072, |
| "grad_norm": 0.0007211797637864947, |
| "learning_rate": 2.9728218739355988e-05, |
| "loss": 0.0, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.12291944865040033, |
| "grad_norm": 22.20551109313965, |
| "learning_rate": 2.972464192409781e-05, |
| "loss": 0.0, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.12371762688838994, |
| "grad_norm": 0.001930927624925971, |
| "learning_rate": 2.972104194387656e-05, |
| "loss": 0.0, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.12451580512637955, |
| "grad_norm": 0.0008476130315102637, |
| "learning_rate": 2.9717418804355775e-05, |
| "loss": 0.0, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.12531398336436916, |
| "grad_norm": 0.016626249998807907, |
| "learning_rate": 2.9713772511235406e-05, |
| "loss": 0.0, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.12611216160235877, |
| "grad_norm": 0.001898073242045939, |
| "learning_rate": 2.971010307025185e-05, |
| "loss": 0.0, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.12691033984034839, |
| "grad_norm": 0.001200891681946814, |
| "learning_rate": 2.9706410487177906e-05, |
| "loss": 0.0, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.127708518078338, |
| "grad_norm": 0.009901273995637894, |
| "learning_rate": 2.970269476782278e-05, |
| "loss": 0.0, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.1285066963163276, |
| "grad_norm": 0.013532022945582867, |
| "learning_rate": 2.969895591803209e-05, |
| "loss": 0.0, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.12930487455431722, |
| "grad_norm": 0.0005334099405445158, |
| "learning_rate": 2.9695193943687834e-05, |
| "loss": 0.0, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.13010305279230683, |
| "grad_norm": 0.003070188919082284, |
| "learning_rate": 2.9691408850708383e-05, |
| "loss": 0.0, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.13090123103029644, |
| "grad_norm": 0.0006594851147383451, |
| "learning_rate": 2.9687600645048488e-05, |
| "loss": 0.0, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.13169940926828608, |
| "grad_norm": 0.0006931771640665829, |
| "learning_rate": 2.9683769332699262e-05, |
| "loss": 0.0, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.1324975875062757, |
| "grad_norm": 0.0007057678885757923, |
| "learning_rate": 2.967991491968816e-05, |
| "loss": 0.0, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.1332957657442653, |
| "grad_norm": 0.022385986521840096, |
| "learning_rate": 2.967603741207899e-05, |
| "loss": 0.0, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.1340939439822549, |
| "grad_norm": 0.00055215775500983, |
| "learning_rate": 2.9672136815971892e-05, |
| "loss": 0.0, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.13489212222024452, |
| "grad_norm": 0.041440509259700775, |
| "learning_rate": 2.9668213137503318e-05, |
| "loss": 0.0, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.13569030045823413, |
| "grad_norm": 0.0025042875204235315, |
| "learning_rate": 2.966426638284604e-05, |
| "loss": 0.0, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.13648847869622374, |
| "grad_norm": 0.0006895024562254548, |
| "learning_rate": 2.9660296558209146e-05, |
| "loss": 0.0, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.13728665693421335, |
| "grad_norm": 0.0008338299230672419, |
| "learning_rate": 2.9656303669837992e-05, |
| "loss": 0.0, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.13808483517220296, |
| "grad_norm": 0.0018846240127459168, |
| "learning_rate": 2.965228772401424e-05, |
| "loss": 0.0, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.13888301341019257, |
| "grad_norm": 0.0004423022910486907, |
| "learning_rate": 2.9648248727055812e-05, |
| "loss": 0.0, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.13968119164818218, |
| "grad_norm": 0.0012980562169104815, |
| "learning_rate": 2.964418668531691e-05, |
| "loss": 0.0, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.1404793698861718, |
| "grad_norm": 0.0038005076348781586, |
| "learning_rate": 2.964010160518798e-05, |
| "loss": 0.0, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.1412775481241614, |
| "grad_norm": 0.0011083179851993918, |
| "learning_rate": 2.9635993493095707e-05, |
| "loss": 0.0, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.14207572636215102, |
| "grad_norm": 0.28431451320648193, |
| "learning_rate": 2.963186235550302e-05, |
| "loss": 0.0, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.14287390460014063, |
| "grad_norm": 0.0007884473307058215, |
| "learning_rate": 2.962770819890907e-05, |
| "loss": 0.0, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.14367208283813024, |
| "grad_norm": 0.0057783485390245914, |
| "learning_rate": 2.9623531029849214e-05, |
| "loss": 0.0, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.14447026107611988, |
| "grad_norm": 9626.4208984375, |
| "learning_rate": 2.961933085489503e-05, |
| "loss": 0.0, |
| "step": 90500 |
| }, |
| { |
| "epoch": 0.1452684393141095, |
| "grad_norm": 0.06994491070508957, |
| "learning_rate": 2.961510768065427e-05, |
| "loss": 0.0, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.1460666175520991, |
| "grad_norm": 0.0010994599433615804, |
| "learning_rate": 2.9610861513770875e-05, |
| "loss": 0.0, |
| "step": 91500 |
| }, |
| { |
| "epoch": 0.1468647957900887, |
| "grad_norm": 0.0005376485059969127, |
| "learning_rate": 2.9606592360924967e-05, |
| "loss": 0.0, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.14766297402807832, |
| "grad_norm": 0.005438265856355429, |
| "learning_rate": 2.9602300228832815e-05, |
| "loss": 0.0, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.14846115226606793, |
| "grad_norm": 0.002110505709424615, |
| "learning_rate": 2.9597985124246854e-05, |
| "loss": 0.0, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.14925933050405754, |
| "grad_norm": 0.0004619982501026243, |
| "learning_rate": 2.959364705395565e-05, |
| "loss": 0.0, |
| "step": 93500 |
| }, |
| { |
| "epoch": 0.15005750874204715, |
| "grad_norm": 0.0009718042565509677, |
| "learning_rate": 2.958928602478389e-05, |
| "loss": 0.0, |
| "step": 94000 |
| }, |
| { |
| "epoch": 0.15085568698003676, |
| "grad_norm": 0.004765130113810301, |
| "learning_rate": 2.9584902043592412e-05, |
| "loss": 0.0, |
| "step": 94500 |
| }, |
| { |
| "epoch": 0.15165386521802637, |
| "grad_norm": 0.19533102214336395, |
| "learning_rate": 2.9580495117278124e-05, |
| "loss": 0.0, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.15245204345601598, |
| "grad_norm": 1262.41259765625, |
| "learning_rate": 2.9576065252774063e-05, |
| "loss": 0.0, |
| "step": 95500 |
| }, |
| { |
| "epoch": 0.1532502216940056, |
| "grad_norm": 0.001484275097027421, |
| "learning_rate": 2.957161245704933e-05, |
| "loss": 0.0, |
| "step": 96000 |
| }, |
| { |
| "epoch": 0.1540483999319952, |
| "grad_norm": 0.0024847572203725576, |
| "learning_rate": 2.9567136737109106e-05, |
| "loss": 0.0, |
| "step": 96500 |
| }, |
| { |
| "epoch": 0.15484657816998482, |
| "grad_norm": 0.0015979851596057415, |
| "learning_rate": 2.9562638099994656e-05, |
| "loss": 0.0, |
| "step": 97000 |
| }, |
| { |
| "epoch": 0.15564475640797443, |
| "grad_norm": 0.005389617756009102, |
| "learning_rate": 2.9558116552783274e-05, |
| "loss": 0.0, |
| "step": 97500 |
| }, |
| { |
| "epoch": 0.15644293464596404, |
| "grad_norm": 0.0015855624806135893, |
| "learning_rate": 2.9553572102588305e-05, |
| "loss": 0.0, |
| "step": 98000 |
| }, |
| { |
| "epoch": 0.15724111288395368, |
| "grad_norm": 0.0018055408727377653, |
| "learning_rate": 2.954900475655913e-05, |
| "loss": 0.0, |
| "step": 98500 |
| }, |
| { |
| "epoch": 0.1580392911219433, |
| "grad_norm": 0.0006732465699315071, |
| "learning_rate": 2.954441452188115e-05, |
| "loss": 0.0, |
| "step": 99000 |
| }, |
| { |
| "epoch": 0.1588374693599329, |
| "grad_norm": 0.0014587478945031762, |
| "learning_rate": 2.953980140577576e-05, |
| "loss": 0.0, |
| "step": 99500 |
| }, |
| { |
| "epoch": 0.1596356475979225, |
| "grad_norm": 0.1299201250076294, |
| "learning_rate": 2.953516541550037e-05, |
| "loss": 0.0, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.16043382583591212, |
| "grad_norm": 0.0025660579558461905, |
| "learning_rate": 2.9530506558348375e-05, |
| "loss": 0.0, |
| "step": 100500 |
| }, |
| { |
| "epoch": 0.16123200407390173, |
| "grad_norm": 0.0009547541849315166, |
| "learning_rate": 2.952582484164912e-05, |
| "loss": 0.0, |
| "step": 101000 |
| }, |
| { |
| "epoch": 0.16203018231189134, |
| "grad_norm": 0.000839733867906034, |
| "learning_rate": 2.952112027276796e-05, |
| "loss": 0.0, |
| "step": 101500 |
| }, |
| { |
| "epoch": 0.16282836054988095, |
| "grad_norm": 0.0009094159468077123, |
| "learning_rate": 2.9516392859106144e-05, |
| "loss": 0.0, |
| "step": 102000 |
| }, |
| { |
| "epoch": 0.16362653878787056, |
| "grad_norm": 0.0008817181806080043, |
| "learning_rate": 2.9511642608100906e-05, |
| "loss": 0.0, |
| "step": 102500 |
| }, |
| { |
| "epoch": 0.16442471702586017, |
| "grad_norm": 0.0047252182848751545, |
| "learning_rate": 2.9506869527225387e-05, |
| "loss": 0.0, |
| "step": 103000 |
| }, |
| { |
| "epoch": 0.16522289526384978, |
| "grad_norm": 0.000495503016281873, |
| "learning_rate": 2.9502073623988646e-05, |
| "loss": 0.0, |
| "step": 103500 |
| }, |
| { |
| "epoch": 0.1660210735018394, |
| "grad_norm": 0.0009167164098471403, |
| "learning_rate": 2.9497254905935656e-05, |
| "loss": 0.0, |
| "step": 104000 |
| }, |
| { |
| "epoch": 0.166819251739829, |
| "grad_norm": 0.0006542898481711745, |
| "learning_rate": 2.949241338064727e-05, |
| "loss": 0.0, |
| "step": 104500 |
| }, |
| { |
| "epoch": 0.16761742997781862, |
| "grad_norm": 0.008757367730140686, |
| "learning_rate": 2.948754905574023e-05, |
| "loss": 0.0, |
| "step": 105000 |
| }, |
| { |
| "epoch": 0.16841560821580823, |
| "grad_norm": 0.0005742495995946229, |
| "learning_rate": 2.9482661938867136e-05, |
| "loss": 0.0, |
| "step": 105500 |
| }, |
| { |
| "epoch": 0.16921378645379787, |
| "grad_norm": 0.0007464766968041658, |
| "learning_rate": 2.947775203771646e-05, |
| "loss": 0.0, |
| "step": 106000 |
| }, |
| { |
| "epoch": 0.17001196469178748, |
| "grad_norm": 0.0006082553300075233, |
| "learning_rate": 2.947281936001251e-05, |
| "loss": 0.0, |
| "step": 106500 |
| }, |
| { |
| "epoch": 0.1708101429297771, |
| "grad_norm": 0.0015842883149161935, |
| "learning_rate": 2.9467863913515423e-05, |
| "loss": 0.0, |
| "step": 107000 |
| }, |
| { |
| "epoch": 0.1716083211677667, |
| "grad_norm": 0.00969216413795948, |
| "learning_rate": 2.9462885706021167e-05, |
| "loss": 0.0, |
| "step": 107500 |
| }, |
| { |
| "epoch": 0.1724064994057563, |
| "grad_norm": 0.000725767866242677, |
| "learning_rate": 2.94578847453615e-05, |
| "loss": 0.0, |
| "step": 108000 |
| }, |
| { |
| "epoch": 0.17320467764374592, |
| "grad_norm": 0.0005956355598755181, |
| "learning_rate": 2.9452861039403994e-05, |
| "loss": 0.0, |
| "step": 108500 |
| }, |
| { |
| "epoch": 0.17400285588173553, |
| "grad_norm": 0.0028481916524469852, |
| "learning_rate": 2.9447814596051997e-05, |
| "loss": 0.0, |
| "step": 109000 |
| }, |
| { |
| "epoch": 0.17480103411972514, |
| "grad_norm": 0.0006914939149282873, |
| "learning_rate": 2.9442745423244625e-05, |
| "loss": 0.0, |
| "step": 109500 |
| }, |
| { |
| "epoch": 0.17559921235771475, |
| "grad_norm": 0.004076390527188778, |
| "learning_rate": 2.9437653528956757e-05, |
| "loss": 0.0, |
| "step": 110000 |
| }, |
| { |
| "epoch": 0.17639739059570436, |
| "grad_norm": 0.0007233788492158055, |
| "learning_rate": 2.943253892119901e-05, |
| "loss": 0.0, |
| "step": 110500 |
| }, |
| { |
| "epoch": 0.17719556883369397, |
| "grad_norm": 0.0004554203769657761, |
| "learning_rate": 2.9427401608017744e-05, |
| "loss": 0.0, |
| "step": 111000 |
| }, |
| { |
| "epoch": 0.17799374707168358, |
| "grad_norm": 0.011892233975231647, |
| "learning_rate": 2.9422241597495035e-05, |
| "loss": 0.0, |
| "step": 111500 |
| }, |
| { |
| "epoch": 0.1787919253096732, |
| "grad_norm": 0.0005836607306264341, |
| "learning_rate": 2.9417058897748664e-05, |
| "loss": 0.0, |
| "step": 112000 |
| }, |
| { |
| "epoch": 0.1795901035476628, |
| "grad_norm": 0.0005727341049350798, |
| "learning_rate": 2.941185351693211e-05, |
| "loss": 0.0, |
| "step": 112500 |
| }, |
| { |
| "epoch": 0.18038828178565242, |
| "grad_norm": 0.0004444452642928809, |
| "learning_rate": 2.9406625463234532e-05, |
| "loss": 0.0, |
| "step": 113000 |
| }, |
| { |
| "epoch": 0.18118646002364203, |
| "grad_norm": 0.0015697794733569026, |
| "learning_rate": 2.940137474488076e-05, |
| "loss": 0.0, |
| "step": 113500 |
| }, |
| { |
| "epoch": 0.18198463826163166, |
| "grad_norm": 0.0027179263997823, |
| "learning_rate": 2.9396101370131284e-05, |
| "loss": 0.0, |
| "step": 114000 |
| }, |
| { |
| "epoch": 0.18278281649962128, |
| "grad_norm": 190.7173309326172, |
| "learning_rate": 2.9390805347282225e-05, |
| "loss": 0.0, |
| "step": 114500 |
| }, |
| { |
| "epoch": 0.1835809947376109, |
| "grad_norm": 0.00045203749323263764, |
| "learning_rate": 2.938548668466535e-05, |
| "loss": 0.0, |
| "step": 115000 |
| }, |
| { |
| "epoch": 0.1843791729756005, |
| "grad_norm": 0.0028070039115846157, |
| "learning_rate": 2.938014539064803e-05, |
| "loss": 0.0, |
| "step": 115500 |
| }, |
| { |
| "epoch": 0.1851773512135901, |
| "grad_norm": 0.0007502553053200245, |
| "learning_rate": 2.9374781473633255e-05, |
| "loss": 0.0, |
| "step": 116000 |
| }, |
| { |
| "epoch": 0.18597552945157972, |
| "grad_norm": 0.0008750234264880419, |
| "learning_rate": 2.9369394942059582e-05, |
| "loss": 0.0, |
| "step": 116500 |
| }, |
| { |
| "epoch": 0.18677370768956933, |
| "grad_norm": 0.0004678396799135953, |
| "learning_rate": 2.9363985804401174e-05, |
| "loss": 0.0, |
| "step": 117000 |
| }, |
| { |
| "epoch": 0.18757188592755894, |
| "grad_norm": 0.0006737300427630544, |
| "learning_rate": 2.9358554069167733e-05, |
| "loss": 0.0, |
| "step": 117500 |
| }, |
| { |
| "epoch": 0.18837006416554855, |
| "grad_norm": 0.001020797179080546, |
| "learning_rate": 2.9353099744904527e-05, |
| "loss": 0.0, |
| "step": 118000 |
| }, |
| { |
| "epoch": 0.18916824240353816, |
| "grad_norm": 0.0005353185697458684, |
| "learning_rate": 2.9347622840192353e-05, |
| "loss": 0.0, |
| "step": 118500 |
| }, |
| { |
| "epoch": 0.18996642064152777, |
| "grad_norm": 0.0031409678049385548, |
| "learning_rate": 2.9342123363647542e-05, |
| "loss": 0.0, |
| "step": 119000 |
| }, |
| { |
| "epoch": 0.19076459887951738, |
| "grad_norm": 0.0005457552615553141, |
| "learning_rate": 2.933660132392193e-05, |
| "loss": 0.0, |
| "step": 119500 |
| }, |
| { |
| "epoch": 0.191562777117507, |
| "grad_norm": 0.007242423016577959, |
| "learning_rate": 2.933105672970284e-05, |
| "loss": 0.0, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.1923609553554966, |
| "grad_norm": 0.0015077221905812621, |
| "learning_rate": 2.9325489589713092e-05, |
| "loss": 0.0, |
| "step": 120500 |
| }, |
| { |
| "epoch": 0.19315913359348622, |
| "grad_norm": 0.000620639999397099, |
| "learning_rate": 2.9319899912710968e-05, |
| "loss": 0.0, |
| "step": 121000 |
| }, |
| { |
| "epoch": 0.19395731183147583, |
| "grad_norm": 0.01805310882627964, |
| "learning_rate": 2.9314287707490208e-05, |
| "loss": 0.0, |
| "step": 121500 |
| }, |
| { |
| "epoch": 0.19475549006946546, |
| "grad_norm": 13.3745698928833, |
| "learning_rate": 2.9308652982879998e-05, |
| "loss": 0.0, |
| "step": 122000 |
| }, |
| { |
| "epoch": 0.19555366830745508, |
| "grad_norm": 0.0002797323395498097, |
| "learning_rate": 2.9302995747744935e-05, |
| "loss": 0.0, |
| "step": 122500 |
| }, |
| { |
| "epoch": 0.19635184654544469, |
| "grad_norm": 0.0012682373635470867, |
| "learning_rate": 2.929731601098505e-05, |
| "loss": 0.0, |
| "step": 123000 |
| }, |
| { |
| "epoch": 0.1971500247834343, |
| "grad_norm": 0.1774342805147171, |
| "learning_rate": 2.9291613781535764e-05, |
| "loss": 0.0, |
| "step": 123500 |
| }, |
| { |
| "epoch": 0.1979482030214239, |
| "grad_norm": 0.0004860001499764621, |
| "learning_rate": 2.928588906836788e-05, |
| "loss": 0.0, |
| "step": 124000 |
| }, |
| { |
| "epoch": 0.19874638125941352, |
| "grad_norm": 0.0010263145668432117, |
| "learning_rate": 2.9280141880487584e-05, |
| "loss": 0.0, |
| "step": 124500 |
| }, |
| { |
| "epoch": 0.19954455949740313, |
| "grad_norm": 0.0008300538174808025, |
| "learning_rate": 2.9274372226936416e-05, |
| "loss": 0.0, |
| "step": 125000 |
| }, |
| { |
| "epoch": 0.20034273773539274, |
| "grad_norm": 0.0008429441950283945, |
| "learning_rate": 2.9268580116791246e-05, |
| "loss": 0.0, |
| "step": 125500 |
| }, |
| { |
| "epoch": 0.20114091597338235, |
| "grad_norm": 0.000568005139939487, |
| "learning_rate": 2.926276555916429e-05, |
| "loss": 0.0, |
| "step": 126000 |
| }, |
| { |
| "epoch": 0.20193909421137196, |
| "grad_norm": 0.00045849403250031173, |
| "learning_rate": 2.9256928563203063e-05, |
| "loss": 0.0, |
| "step": 126500 |
| }, |
| { |
| "epoch": 0.20273727244936157, |
| "grad_norm": 0.001164284534752369, |
| "learning_rate": 2.9251069138090403e-05, |
| "loss": 0.0, |
| "step": 127000 |
| }, |
| { |
| "epoch": 0.20353545068735118, |
| "grad_norm": 20.4660587310791, |
| "learning_rate": 2.924518729304441e-05, |
| "loss": 0.0, |
| "step": 127500 |
| }, |
| { |
| "epoch": 0.2043336289253408, |
| "grad_norm": 0.00039680031477473676, |
| "learning_rate": 2.9239283037318466e-05, |
| "loss": 0.0, |
| "step": 128000 |
| }, |
| { |
| "epoch": 0.2051318071633304, |
| "grad_norm": 0.0023022566456347704, |
| "learning_rate": 2.9233356380201214e-05, |
| "loss": 0.0, |
| "step": 128500 |
| }, |
| { |
| "epoch": 0.20592998540132001, |
| "grad_norm": 0.000906547240447253, |
| "learning_rate": 2.9227407331016532e-05, |
| "loss": 0.0, |
| "step": 129000 |
| }, |
| { |
| "epoch": 0.20672816363930965, |
| "grad_norm": 0.0004909643321298063, |
| "learning_rate": 2.9221435899123522e-05, |
| "loss": 0.0, |
| "step": 129500 |
| }, |
| { |
| "epoch": 0.20752634187729926, |
| "grad_norm": 0.0005696564330719411, |
| "learning_rate": 2.921544209391651e-05, |
| "loss": 0.0, |
| "step": 130000 |
| }, |
| { |
| "epoch": 0.20832452011528887, |
| "grad_norm": 0.0012174558360129595, |
| "learning_rate": 2.920942592482501e-05, |
| "loss": 0.0, |
| "step": 130500 |
| }, |
| { |
| "epoch": 0.20912269835327849, |
| "grad_norm": 0.0008444120176136494, |
| "learning_rate": 2.920338740131373e-05, |
| "loss": 0.0, |
| "step": 131000 |
| }, |
| { |
| "epoch": 0.2099208765912681, |
| "grad_norm": 19.62136459350586, |
| "learning_rate": 2.919732653288253e-05, |
| "loss": 0.0, |
| "step": 131500 |
| }, |
| { |
| "epoch": 0.2107190548292577, |
| "grad_norm": 0.0005330504500307143, |
| "learning_rate": 2.919124332906644e-05, |
| "loss": 0.0, |
| "step": 132000 |
| }, |
| { |
| "epoch": 0.21151723306724732, |
| "grad_norm": 0.0009033152018673718, |
| "learning_rate": 2.9185137799435615e-05, |
| "loss": 0.0, |
| "step": 132500 |
| }, |
| { |
| "epoch": 0.21231541130523693, |
| "grad_norm": 0.000573009136132896, |
| "learning_rate": 2.9179009953595344e-05, |
| "loss": 0.0, |
| "step": 133000 |
| }, |
| { |
| "epoch": 0.21311358954322654, |
| "grad_norm": 0.0006213324377313256, |
| "learning_rate": 2.9172859801186013e-05, |
| "loss": 0.0, |
| "step": 133500 |
| }, |
| { |
| "epoch": 0.21391176778121615, |
| "grad_norm": 0.0007363075274042785, |
| "learning_rate": 2.916668735188312e-05, |
| "loss": 0.0, |
| "step": 134000 |
| }, |
| { |
| "epoch": 0.21470994601920576, |
| "grad_norm": 0.0005618541617877781, |
| "learning_rate": 2.916049261539721e-05, |
| "loss": 0.0, |
| "step": 134500 |
| }, |
| { |
| "epoch": 0.21550812425719537, |
| "grad_norm": 0.006180692929774523, |
| "learning_rate": 2.9154275601473923e-05, |
| "loss": 0.0, |
| "step": 135000 |
| }, |
| { |
| "epoch": 0.21630630249518498, |
| "grad_norm": 0.0006301426910795271, |
| "learning_rate": 2.914803631989392e-05, |
| "loss": 0.0, |
| "step": 135500 |
| }, |
| { |
| "epoch": 0.2171044807331746, |
| "grad_norm": 0.0004460285708773881, |
| "learning_rate": 2.9141774780472914e-05, |
| "loss": 0.0, |
| "step": 136000 |
| }, |
| { |
| "epoch": 0.2179026589711642, |
| "grad_norm": 0.0005381643422879279, |
| "learning_rate": 2.9135490993061626e-05, |
| "loss": 0.0, |
| "step": 136500 |
| }, |
| { |
| "epoch": 0.21870083720915381, |
| "grad_norm": 105.94903564453125, |
| "learning_rate": 2.9129184967545768e-05, |
| "loss": 0.0, |
| "step": 137000 |
| }, |
| { |
| "epoch": 0.21949901544714345, |
| "grad_norm": 0.0006671809242106974, |
| "learning_rate": 2.9122856713846047e-05, |
| "loss": 0.0, |
| "step": 137500 |
| }, |
| { |
| "epoch": 0.22029719368513306, |
| "grad_norm": 0.0009246356203220785, |
| "learning_rate": 2.911650624191815e-05, |
| "loss": 0.0, |
| "step": 138000 |
| }, |
| { |
| "epoch": 0.22109537192312267, |
| "grad_norm": 0.000820090644992888, |
| "learning_rate": 2.9110133561752703e-05, |
| "loss": 0.0, |
| "step": 138500 |
| }, |
| { |
| "epoch": 0.22189355016111229, |
| "grad_norm": 3.0545477867126465, |
| "learning_rate": 2.9103738683375266e-05, |
| "loss": 0.0, |
| "step": 139000 |
| }, |
| { |
| "epoch": 0.2226917283991019, |
| "grad_norm": 0.0010678736725822091, |
| "learning_rate": 2.9097321616846334e-05, |
| "loss": 0.0, |
| "step": 139500 |
| }, |
| { |
| "epoch": 0.2234899066370915, |
| "grad_norm": 0.003939803224056959, |
| "learning_rate": 2.9090882372261308e-05, |
| "loss": 0.0, |
| "step": 140000 |
| }, |
| { |
| "epoch": 0.22428808487508112, |
| "grad_norm": 0.0022800497245043516, |
| "learning_rate": 2.908442095975047e-05, |
| "loss": 0.0, |
| "step": 140500 |
| }, |
| { |
| "epoch": 0.22508626311307073, |
| "grad_norm": 0.000471117120468989, |
| "learning_rate": 2.907793738947899e-05, |
| "loss": 0.0, |
| "step": 141000 |
| }, |
| { |
| "epoch": 0.22588444135106034, |
| "grad_norm": 135.74588012695312, |
| "learning_rate": 2.9071431671646884e-05, |
| "loss": 0.0, |
| "step": 141500 |
| }, |
| { |
| "epoch": 0.22668261958904995, |
| "grad_norm": 0.00035868247505277395, |
| "learning_rate": 2.9064903816489015e-05, |
| "loss": 0.0, |
| "step": 142000 |
| }, |
| { |
| "epoch": 0.22748079782703956, |
| "grad_norm": 0.004526620730757713, |
| "learning_rate": 2.905835383427508e-05, |
| "loss": 0.0, |
| "step": 142500 |
| }, |
| { |
| "epoch": 0.22827897606502917, |
| "grad_norm": 0.01911444030702114, |
| "learning_rate": 2.9051781735309576e-05, |
| "loss": 0.0, |
| "step": 143000 |
| }, |
| { |
| "epoch": 0.22907715430301878, |
| "grad_norm": 3154.1181640625, |
| "learning_rate": 2.9045187529931803e-05, |
| "loss": 0.0, |
| "step": 143500 |
| }, |
| { |
| "epoch": 0.2298753325410084, |
| "grad_norm": 0.00026835728203877807, |
| "learning_rate": 2.903857122851583e-05, |
| "loss": 0.0, |
| "step": 144000 |
| }, |
| { |
| "epoch": 0.230673510778998, |
| "grad_norm": 523.6666259765625, |
| "learning_rate": 2.9031932841470495e-05, |
| "loss": 0.0, |
| "step": 144500 |
| }, |
| { |
| "epoch": 0.23147168901698761, |
| "grad_norm": 0.002406003652140498, |
| "learning_rate": 2.9025272379239383e-05, |
| "loss": 0.0, |
| "step": 145000 |
| }, |
| { |
| "epoch": 0.23226986725497725, |
| "grad_norm": 0.0015471165534108877, |
| "learning_rate": 2.9018589852300794e-05, |
| "loss": 0.0, |
| "step": 145500 |
| }, |
| { |
| "epoch": 0.23306804549296686, |
| "grad_norm": 0.004399681463837624, |
| "learning_rate": 2.901188527116776e-05, |
| "loss": 0.0, |
| "step": 146000 |
| }, |
| { |
| "epoch": 0.23386622373095647, |
| "grad_norm": 15.444055557250977, |
| "learning_rate": 2.9005158646387993e-05, |
| "loss": 0.0, |
| "step": 146500 |
| }, |
| { |
| "epoch": 0.23466440196894608, |
| "grad_norm": 0.018915316089987755, |
| "learning_rate": 2.8998409988543897e-05, |
| "loss": 0.0, |
| "step": 147000 |
| }, |
| { |
| "epoch": 0.2354625802069357, |
| "grad_norm": 57.25000762939453, |
| "learning_rate": 2.8991639308252527e-05, |
| "loss": 0.0, |
| "step": 147500 |
| }, |
| { |
| "epoch": 0.2362607584449253, |
| "grad_norm": 0.0022897564340382814, |
| "learning_rate": 2.8984846616165586e-05, |
| "loss": 0.0, |
| "step": 148000 |
| }, |
| { |
| "epoch": 0.23705893668291492, |
| "grad_norm": 0.00042616488644853234, |
| "learning_rate": 2.8978031922969418e-05, |
| "loss": 0.0, |
| "step": 148500 |
| }, |
| { |
| "epoch": 0.23785711492090453, |
| "grad_norm": 0.01706228218972683, |
| "learning_rate": 2.8971195239384966e-05, |
| "loss": 0.0, |
| "step": 149000 |
| }, |
| { |
| "epoch": 0.23865529315889414, |
| "grad_norm": 0.0007037579198367894, |
| "learning_rate": 2.896433657616777e-05, |
| "loss": 0.0, |
| "step": 149500 |
| }, |
| { |
| "epoch": 0.23945347139688375, |
| "grad_norm": 0.0006432163645513356, |
| "learning_rate": 2.8957455944107963e-05, |
| "loss": 0.0, |
| "step": 150000 |
| }, |
| { |
| "epoch": 0.23945347139688375, |
| "eval_loss": 2.1141684555914253e-05, |
| "eval_runtime": 21663.2369, |
| "eval_samples_per_second": 102.814, |
| "eval_steps_per_second": 3.213, |
| "step": 150000 |
| }, |
| { |
| "epoch": 0.24025164963487336, |
| "grad_norm": 0.000818128464743495, |
| "learning_rate": 2.8950553354030216e-05, |
| "loss": 0.0, |
| "step": 150500 |
| }, |
| { |
| "epoch": 0.24104982787286297, |
| "grad_norm": 186.0622100830078, |
| "learning_rate": 2.894362881679376e-05, |
| "loss": 0.0, |
| "step": 151000 |
| }, |
| { |
| "epoch": 0.24184800611085258, |
| "grad_norm": 1975.7821044921875, |
| "learning_rate": 2.893668234329236e-05, |
| "loss": 0.0, |
| "step": 151500 |
| }, |
| { |
| "epoch": 0.2426461843488422, |
| "grad_norm": 0.00043766028829850256, |
| "learning_rate": 2.892971394445427e-05, |
| "loss": 0.0, |
| "step": 152000 |
| }, |
| { |
| "epoch": 0.2434443625868318, |
| "grad_norm": 0.00039182481123134494, |
| "learning_rate": 2.8922723631242254e-05, |
| "loss": 0.0, |
| "step": 152500 |
| }, |
| { |
| "epoch": 0.24424254082482144, |
| "grad_norm": 0.0010972399031743407, |
| "learning_rate": 2.8915711414653543e-05, |
| "loss": 0.0, |
| "step": 153000 |
| }, |
| { |
| "epoch": 0.24504071906281105, |
| "grad_norm": 0.05904774367809296, |
| "learning_rate": 2.8908677305719836e-05, |
| "loss": 0.0, |
| "step": 153500 |
| }, |
| { |
| "epoch": 0.24583889730080066, |
| "grad_norm": 0.019364451989531517, |
| "learning_rate": 2.890162131550727e-05, |
| "loss": 0.0, |
| "step": 154000 |
| }, |
| { |
| "epoch": 0.24663707553879027, |
| "grad_norm": 0.000549559888895601, |
| "learning_rate": 2.8894543455116397e-05, |
| "loss": 0.0, |
| "step": 154500 |
| }, |
| { |
| "epoch": 0.24743525377677988, |
| "grad_norm": 0.00035546591971069574, |
| "learning_rate": 2.888744373568218e-05, |
| "loss": 0.0, |
| "step": 155000 |
| }, |
| { |
| "epoch": 0.2482334320147695, |
| "grad_norm": 0.0004987181746400893, |
| "learning_rate": 2.8880322168373987e-05, |
| "loss": 0.0, |
| "step": 155500 |
| }, |
| { |
| "epoch": 0.2490316102527591, |
| "grad_norm": 0.00044386033550836146, |
| "learning_rate": 2.887317876439553e-05, |
| "loss": 0.0, |
| "step": 156000 |
| }, |
| { |
| "epoch": 0.24982978849074872, |
| "grad_norm": 7.740879535675049, |
| "learning_rate": 2.886601353498489e-05, |
| "loss": 0.0, |
| "step": 156500 |
| }, |
| { |
| "epoch": 0.2506279667287383, |
| "grad_norm": 0.0005541268619708717, |
| "learning_rate": 2.8858826491414486e-05, |
| "loss": 0.0, |
| "step": 157000 |
| }, |
| { |
| "epoch": 0.25142614496672794, |
| "grad_norm": 0.0003068426449317485, |
| "learning_rate": 2.885161764499105e-05, |
| "loss": 0.0, |
| "step": 157500 |
| }, |
| { |
| "epoch": 0.25222432320471755, |
| "grad_norm": 0.0004327596980147064, |
| "learning_rate": 2.8844387007055617e-05, |
| "loss": 0.0, |
| "step": 158000 |
| }, |
| { |
| "epoch": 0.25302250144270716, |
| "grad_norm": 0.0004614158533513546, |
| "learning_rate": 2.88371345889835e-05, |
| "loss": 0.0, |
| "step": 158500 |
| }, |
| { |
| "epoch": 0.25382067968069677, |
| "grad_norm": 0.007937498390674591, |
| "learning_rate": 2.8829860402184278e-05, |
| "loss": 0.0, |
| "step": 159000 |
| }, |
| { |
| "epoch": 0.2546188579186864, |
| "grad_norm": 0.00025344284949824214, |
| "learning_rate": 2.882256445810179e-05, |
| "loss": 0.0, |
| "step": 159500 |
| }, |
| { |
| "epoch": 0.255417036156676, |
| "grad_norm": 0.00045202774344943464, |
| "learning_rate": 2.881524676821408e-05, |
| "loss": 0.0, |
| "step": 160000 |
| }, |
| { |
| "epoch": 0.2562152143946656, |
| "grad_norm": 0.0005293320282362401, |
| "learning_rate": 2.880790734403342e-05, |
| "loss": 0.0, |
| "step": 160500 |
| }, |
| { |
| "epoch": 0.2570133926326552, |
| "grad_norm": 6070.3427734375, |
| "learning_rate": 2.8800546197106277e-05, |
| "loss": 0.0, |
| "step": 161000 |
| }, |
| { |
| "epoch": 0.2578115708706448, |
| "grad_norm": 0.0039901817217469215, |
| "learning_rate": 2.8793163339013275e-05, |
| "loss": 0.0, |
| "step": 161500 |
| }, |
| { |
| "epoch": 0.25860974910863443, |
| "grad_norm": 0.000935662304982543, |
| "learning_rate": 2.878575878136921e-05, |
| "loss": 0.0, |
| "step": 162000 |
| }, |
| { |
| "epoch": 0.25940792734662405, |
| "grad_norm": 0.00384966260753572, |
| "learning_rate": 2.8778332535823013e-05, |
| "loss": 0.0, |
| "step": 162500 |
| }, |
| { |
| "epoch": 0.26020610558461366, |
| "grad_norm": 0.000354414718458429, |
| "learning_rate": 2.8770884614057727e-05, |
| "loss": 0.0, |
| "step": 163000 |
| }, |
| { |
| "epoch": 0.26100428382260327, |
| "grad_norm": 0.0002711515699047595, |
| "learning_rate": 2.87634150277905e-05, |
| "loss": 0.0, |
| "step": 163500 |
| }, |
| { |
| "epoch": 0.2618024620605929, |
| "grad_norm": 0.0004056449397467077, |
| "learning_rate": 2.8755923788772574e-05, |
| "loss": 0.0, |
| "step": 164000 |
| }, |
| { |
| "epoch": 0.2626006402985825, |
| "grad_norm": 0.022129971534013748, |
| "learning_rate": 2.874841090878924e-05, |
| "loss": 0.0, |
| "step": 164500 |
| }, |
| { |
| "epoch": 0.26339881853657215, |
| "grad_norm": 0.00036545773036777973, |
| "learning_rate": 2.8740876399659837e-05, |
| "loss": 0.0, |
| "step": 165000 |
| }, |
| { |
| "epoch": 0.26419699677456177, |
| "grad_norm": 0.003474722383543849, |
| "learning_rate": 2.8733320273237744e-05, |
| "loss": 0.0, |
| "step": 165500 |
| }, |
| { |
| "epoch": 0.2649951750125514, |
| "grad_norm": 0.005221190862357616, |
| "learning_rate": 2.8725742541410327e-05, |
| "loss": 0.0, |
| "step": 166000 |
| }, |
| { |
| "epoch": 0.265793353250541, |
| "grad_norm": 0.0006509709637612104, |
| "learning_rate": 2.871814321609897e-05, |
| "loss": 0.0, |
| "step": 166500 |
| }, |
| { |
| "epoch": 0.2665915314885306, |
| "grad_norm": 0.00034523566137067974, |
| "learning_rate": 2.8710522309258996e-05, |
| "loss": 0.0, |
| "step": 167000 |
| }, |
| { |
| "epoch": 0.2673897097265202, |
| "grad_norm": 0.007011398207396269, |
| "learning_rate": 2.870287983287971e-05, |
| "loss": 0.0, |
| "step": 167500 |
| }, |
| { |
| "epoch": 0.2681878879645098, |
| "grad_norm": 0.0003220826911274344, |
| "learning_rate": 2.8695215798984326e-05, |
| "loss": 0.0, |
| "step": 168000 |
| }, |
| { |
| "epoch": 0.26898606620249943, |
| "grad_norm": 0.00026051796157844365, |
| "learning_rate": 2.8687530219629986e-05, |
| "loss": 0.0, |
| "step": 168500 |
| }, |
| { |
| "epoch": 0.26978424444048904, |
| "grad_norm": 0.005468637682497501, |
| "learning_rate": 2.8679823106907734e-05, |
| "loss": 0.0, |
| "step": 169000 |
| }, |
| { |
| "epoch": 0.27058242267847865, |
| "grad_norm": 0.0006140482728369534, |
| "learning_rate": 2.8672094472942476e-05, |
| "loss": 0.0, |
| "step": 169500 |
| }, |
| { |
| "epoch": 0.27138060091646826, |
| "grad_norm": 0.000635271891951561, |
| "learning_rate": 2.8664344329892976e-05, |
| "loss": 0.0, |
| "step": 170000 |
| }, |
| { |
| "epoch": 0.2721787791544579, |
| "grad_norm": 0.0008043874986469746, |
| "learning_rate": 2.8656572689951845e-05, |
| "loss": 0.0, |
| "step": 170500 |
| }, |
| { |
| "epoch": 0.2729769573924475, |
| "grad_norm": 3775.17041015625, |
| "learning_rate": 2.8648779565345512e-05, |
| "loss": 0.0, |
| "step": 171000 |
| }, |
| { |
| "epoch": 0.2737751356304371, |
| "grad_norm": 0.0005806323024444282, |
| "learning_rate": 2.8640964968334205e-05, |
| "loss": 0.0, |
| "step": 171500 |
| }, |
| { |
| "epoch": 0.2745733138684267, |
| "grad_norm": 0.0011638767318800092, |
| "learning_rate": 2.8633128911211924e-05, |
| "loss": 0.0, |
| "step": 172000 |
| }, |
| { |
| "epoch": 0.2753714921064163, |
| "grad_norm": 0.0003357531677465886, |
| "learning_rate": 2.862527140630644e-05, |
| "loss": 0.0, |
| "step": 172500 |
| }, |
| { |
| "epoch": 0.2761696703444059, |
| "grad_norm": 0.020955944433808327, |
| "learning_rate": 2.8617392465979268e-05, |
| "loss": 0.0, |
| "step": 173000 |
| }, |
| { |
| "epoch": 0.27696784858239554, |
| "grad_norm": 0.00024525824119336903, |
| "learning_rate": 2.8609492102625634e-05, |
| "loss": 0.0, |
| "step": 173500 |
| }, |
| { |
| "epoch": 0.27776602682038515, |
| "grad_norm": 0.0786820575594902, |
| "learning_rate": 2.8601570328674474e-05, |
| "loss": 0.0, |
| "step": 174000 |
| }, |
| { |
| "epoch": 0.27856420505837476, |
| "grad_norm": 0.0002496826637070626, |
| "learning_rate": 2.859362715658841e-05, |
| "loss": 0.0, |
| "step": 174500 |
| }, |
| { |
| "epoch": 0.27936238329636437, |
| "grad_norm": 0.00032802074565552175, |
| "learning_rate": 2.8585662598863728e-05, |
| "loss": 0.0, |
| "step": 175000 |
| }, |
| { |
| "epoch": 0.280160561534354, |
| "grad_norm": 0.0019494870211929083, |
| "learning_rate": 2.8577676668030345e-05, |
| "loss": 0.0, |
| "step": 175500 |
| }, |
| { |
| "epoch": 0.2809587397723436, |
| "grad_norm": 0.0005683369236066937, |
| "learning_rate": 2.856966937665182e-05, |
| "loss": 0.0, |
| "step": 176000 |
| }, |
| { |
| "epoch": 0.2817569180103332, |
| "grad_norm": 0.000460715004010126, |
| "learning_rate": 2.8561640737325308e-05, |
| "loss": 0.0, |
| "step": 176500 |
| }, |
| { |
| "epoch": 0.2825550962483228, |
| "grad_norm": 0.00047412581625394523, |
| "learning_rate": 2.8553590762681547e-05, |
| "loss": 0.0, |
| "step": 177000 |
| }, |
| { |
| "epoch": 0.2833532744863124, |
| "grad_norm": 0.00045982238953001797, |
| "learning_rate": 2.854551946538485e-05, |
| "loss": 0.0, |
| "step": 177500 |
| }, |
| { |
| "epoch": 0.28415145272430203, |
| "grad_norm": 0.44650372862815857, |
| "learning_rate": 2.8537426858133053e-05, |
| "loss": 0.0, |
| "step": 178000 |
| }, |
| { |
| "epoch": 0.28494963096229164, |
| "grad_norm": 0.0004917326150462031, |
| "learning_rate": 2.852931295365754e-05, |
| "loss": 0.0, |
| "step": 178500 |
| }, |
| { |
| "epoch": 0.28574780920028126, |
| "grad_norm": 0.0005965101881884038, |
| "learning_rate": 2.85211777647232e-05, |
| "loss": 0.0, |
| "step": 179000 |
| }, |
| { |
| "epoch": 0.28654598743827087, |
| "grad_norm": 0.004035876132547855, |
| "learning_rate": 2.8513021304128383e-05, |
| "loss": 0.0, |
| "step": 179500 |
| }, |
| { |
| "epoch": 0.2873441656762605, |
| "grad_norm": 0.0002739470510277897, |
| "learning_rate": 2.850484358470493e-05, |
| "loss": 0.0, |
| "step": 180000 |
| }, |
| { |
| "epoch": 0.28814234391425014, |
| "grad_norm": 7.400282859802246, |
| "learning_rate": 2.8496644619318112e-05, |
| "loss": 0.0, |
| "step": 180500 |
| }, |
| { |
| "epoch": 0.28894052215223975, |
| "grad_norm": 0.008809339255094528, |
| "learning_rate": 2.848842442086663e-05, |
| "loss": 0.0, |
| "step": 181000 |
| }, |
| { |
| "epoch": 0.28973870039022936, |
| "grad_norm": 0.005798212252557278, |
| "learning_rate": 2.848018300228259e-05, |
| "loss": 0.0, |
| "step": 181500 |
| }, |
| { |
| "epoch": 0.290536878628219, |
| "grad_norm": 506.3049011230469, |
| "learning_rate": 2.847192037653147e-05, |
| "loss": 0.0, |
| "step": 182000 |
| }, |
| { |
| "epoch": 0.2913350568662086, |
| "grad_norm": 1442.7161865234375, |
| "learning_rate": 2.846363655661213e-05, |
| "loss": 0.0, |
| "step": 182500 |
| }, |
| { |
| "epoch": 0.2921332351041982, |
| "grad_norm": 0.0002742527285590768, |
| "learning_rate": 2.845533155555676e-05, |
| "loss": 0.0, |
| "step": 183000 |
| }, |
| { |
| "epoch": 0.2929314133421878, |
| "grad_norm": 0.053963255137205124, |
| "learning_rate": 2.844700538643088e-05, |
| "loss": 0.0, |
| "step": 183500 |
| }, |
| { |
| "epoch": 0.2937295915801774, |
| "grad_norm": 0.00021619696053676307, |
| "learning_rate": 2.8438658062333298e-05, |
| "loss": 0.0, |
| "step": 184000 |
| }, |
| { |
| "epoch": 0.29452776981816703, |
| "grad_norm": 0.0017313063144683838, |
| "learning_rate": 2.843028959639612e-05, |
| "loss": 0.0, |
| "step": 184500 |
| }, |
| { |
| "epoch": 0.29532594805615664, |
| "grad_norm": 0.004092790186405182, |
| "learning_rate": 2.8421900001784705e-05, |
| "loss": 0.0, |
| "step": 185000 |
| }, |
| { |
| "epoch": 0.29612412629414625, |
| "grad_norm": 0.0004076190816704184, |
| "learning_rate": 2.8413489291697654e-05, |
| "loss": 0.0, |
| "step": 185500 |
| }, |
| { |
| "epoch": 0.29692230453213586, |
| "grad_norm": 0.00018880168499890715, |
| "learning_rate": 2.8405057479366783e-05, |
| "loss": 0.0, |
| "step": 186000 |
| }, |
| { |
| "epoch": 0.29772048277012547, |
| "grad_norm": 0.0006835302338004112, |
| "learning_rate": 2.8396604578057106e-05, |
| "loss": 0.0, |
| "step": 186500 |
| }, |
| { |
| "epoch": 0.2985186610081151, |
| "grad_norm": 0.000788278179243207, |
| "learning_rate": 2.838813060106682e-05, |
| "loss": 0.0, |
| "step": 187000 |
| }, |
| { |
| "epoch": 0.2993168392461047, |
| "grad_norm": 0.0005574611132033169, |
| "learning_rate": 2.837963556172728e-05, |
| "loss": 0.0, |
| "step": 187500 |
| }, |
| { |
| "epoch": 0.3001150174840943, |
| "grad_norm": 0.00031035192660056055, |
| "learning_rate": 2.8371119473402962e-05, |
| "loss": 0.0, |
| "step": 188000 |
| }, |
| { |
| "epoch": 0.3009131957220839, |
| "grad_norm": 0.0002816423657350242, |
| "learning_rate": 2.8362582349491475e-05, |
| "loss": 0.0, |
| "step": 188500 |
| }, |
| { |
| "epoch": 0.3017113739600735, |
| "grad_norm": 0.0002125926548615098, |
| "learning_rate": 2.8354024203423506e-05, |
| "loss": 0.0, |
| "step": 189000 |
| }, |
| { |
| "epoch": 0.30250955219806314, |
| "grad_norm": 0.001808375702239573, |
| "learning_rate": 2.8345445048662833e-05, |
| "loss": 0.0, |
| "step": 189500 |
| }, |
| { |
| "epoch": 0.30330773043605275, |
| "grad_norm": 2678.749267578125, |
| "learning_rate": 2.8336844898706263e-05, |
| "loss": 0.0, |
| "step": 190000 |
| }, |
| { |
| "epoch": 0.30410590867404236, |
| "grad_norm": 0.00032715877750888467, |
| "learning_rate": 2.8328223767083646e-05, |
| "loss": 0.0, |
| "step": 190500 |
| }, |
| { |
| "epoch": 0.30490408691203197, |
| "grad_norm": 0.0005066508892923594, |
| "learning_rate": 2.8319581667357835e-05, |
| "loss": 0.0, |
| "step": 191000 |
| }, |
| { |
| "epoch": 0.3057022651500216, |
| "grad_norm": 0.00035031078732572496, |
| "learning_rate": 2.831091861312468e-05, |
| "loss": 0.0, |
| "step": 191500 |
| }, |
| { |
| "epoch": 0.3065004433880112, |
| "grad_norm": 2.2488694190979004, |
| "learning_rate": 2.8302234618012987e-05, |
| "loss": 0.0, |
| "step": 192000 |
| }, |
| { |
| "epoch": 0.3072986216260008, |
| "grad_norm": 0.00026478810468688607, |
| "learning_rate": 2.8293529695684503e-05, |
| "loss": 0.0, |
| "step": 192500 |
| }, |
| { |
| "epoch": 0.3080967998639904, |
| "grad_norm": 0.0003047047066502273, |
| "learning_rate": 2.8284803859833914e-05, |
| "loss": 0.0, |
| "step": 193000 |
| }, |
| { |
| "epoch": 0.30889497810198, |
| "grad_norm": 0.00020288255473133177, |
| "learning_rate": 2.827605712418879e-05, |
| "loss": 0.0, |
| "step": 193500 |
| }, |
| { |
| "epoch": 0.30969315633996963, |
| "grad_norm": 0.0002386285923421383, |
| "learning_rate": 2.8267289502509593e-05, |
| "loss": 0.0, |
| "step": 194000 |
| }, |
| { |
| "epoch": 0.31049133457795924, |
| "grad_norm": 0.007014371454715729, |
| "learning_rate": 2.8258501008589643e-05, |
| "loss": 0.0, |
| "step": 194500 |
| }, |
| { |
| "epoch": 0.31128951281594885, |
| "grad_norm": 0.0012399045517668128, |
| "learning_rate": 2.8249691656255076e-05, |
| "loss": 0.0, |
| "step": 195000 |
| }, |
| { |
| "epoch": 0.31208769105393847, |
| "grad_norm": 0.006949532311409712, |
| "learning_rate": 2.8240861459364876e-05, |
| "loss": 0.0, |
| "step": 195500 |
| }, |
| { |
| "epoch": 0.3128858692919281, |
| "grad_norm": 0.011967489495873451, |
| "learning_rate": 2.823201043181079e-05, |
| "loss": 0.0, |
| "step": 196000 |
| }, |
| { |
| "epoch": 0.31368404752991774, |
| "grad_norm": 0.00042293136357329786, |
| "learning_rate": 2.8223138587517358e-05, |
| "loss": 0.0, |
| "step": 196500 |
| }, |
| { |
| "epoch": 0.31448222576790735, |
| "grad_norm": 0.00022384982730727643, |
| "learning_rate": 2.8214245940441855e-05, |
| "loss": 0.0, |
| "step": 197000 |
| }, |
| { |
| "epoch": 0.31528040400589696, |
| "grad_norm": 0.00030645483639091253, |
| "learning_rate": 2.820533250457429e-05, |
| "loss": 0.0, |
| "step": 197500 |
| }, |
| { |
| "epoch": 0.3160785822438866, |
| "grad_norm": 0.18542060256004333, |
| "learning_rate": 2.819639829393737e-05, |
| "loss": 0.0, |
| "step": 198000 |
| }, |
| { |
| "epoch": 0.3168767604818762, |
| "grad_norm": 0.013315930962562561, |
| "learning_rate": 2.81874433225865e-05, |
| "loss": 0.0, |
| "step": 198500 |
| }, |
| { |
| "epoch": 0.3176749387198658, |
| "grad_norm": 0.0004632196214515716, |
| "learning_rate": 2.817846760460972e-05, |
| "loss": 0.0, |
| "step": 199000 |
| }, |
| { |
| "epoch": 0.3184731169578554, |
| "grad_norm": 0.0004162040422670543, |
| "learning_rate": 2.816947115412774e-05, |
| "loss": 0.0, |
| "step": 199500 |
| }, |
| { |
| "epoch": 0.319271295195845, |
| "grad_norm": 0.008228735998272896, |
| "learning_rate": 2.8160453985293868e-05, |
| "loss": 0.0, |
| "step": 200000 |
| }, |
| { |
| "epoch": 0.32006947343383463, |
| "grad_norm": 0.002274709288030863, |
| "learning_rate": 2.8151416112294007e-05, |
| "loss": 0.0, |
| "step": 200500 |
| }, |
| { |
| "epoch": 0.32086765167182424, |
| "grad_norm": 0.006481232587248087, |
| "learning_rate": 2.8142357549346632e-05, |
| "loss": 0.0, |
| "step": 201000 |
| }, |
| { |
| "epoch": 0.32166582990981385, |
| "grad_norm": 0.0003591532295104116, |
| "learning_rate": 2.8133278310702778e-05, |
| "loss": 0.0, |
| "step": 201500 |
| }, |
| { |
| "epoch": 0.32246400814780346, |
| "grad_norm": 0.000833726953715086, |
| "learning_rate": 2.812417841064599e-05, |
| "loss": 0.0, |
| "step": 202000 |
| }, |
| { |
| "epoch": 0.32326218638579307, |
| "grad_norm": 0.008296859450638294, |
| "learning_rate": 2.8115057863492336e-05, |
| "loss": 0.0, |
| "step": 202500 |
| }, |
| { |
| "epoch": 0.3240603646237827, |
| "grad_norm": 0.0004818796587642282, |
| "learning_rate": 2.8105916683590356e-05, |
| "loss": 0.0, |
| "step": 203000 |
| }, |
| { |
| "epoch": 0.3248585428617723, |
| "grad_norm": 0.0006113914423622191, |
| "learning_rate": 2.8096754885321048e-05, |
| "loss": 0.0, |
| "step": 203500 |
| }, |
| { |
| "epoch": 0.3256567210997619, |
| "grad_norm": 0.001762945088557899, |
| "learning_rate": 2.808757248309785e-05, |
| "loss": 0.0, |
| "step": 204000 |
| }, |
| { |
| "epoch": 0.3264548993377515, |
| "grad_norm": 0.0005597475683316588, |
| "learning_rate": 2.8078369491366622e-05, |
| "loss": 0.0, |
| "step": 204500 |
| }, |
| { |
| "epoch": 0.3272530775757411, |
| "grad_norm": 0.0004654059885069728, |
| "learning_rate": 2.80691459246056e-05, |
| "loss": 0.0, |
| "step": 205000 |
| }, |
| { |
| "epoch": 0.32805125581373074, |
| "grad_norm": 0.01907452754676342, |
| "learning_rate": 2.8059901797325403e-05, |
| "loss": 0.0, |
| "step": 205500 |
| }, |
| { |
| "epoch": 0.32884943405172035, |
| "grad_norm": 0.0023259760346263647, |
| "learning_rate": 2.8050637124068985e-05, |
| "loss": 0.0, |
| "step": 206000 |
| }, |
| { |
| "epoch": 0.32964761228970996, |
| "grad_norm": 0.00024120333546306938, |
| "learning_rate": 2.8041351919411633e-05, |
| "loss": 0.0, |
| "step": 206500 |
| }, |
| { |
| "epoch": 0.33044579052769957, |
| "grad_norm": 0.00039336824556812644, |
| "learning_rate": 2.803204619796093e-05, |
| "loss": 0.0, |
| "step": 207000 |
| }, |
| { |
| "epoch": 0.3312439687656892, |
| "grad_norm": 0.00048360280925408006, |
| "learning_rate": 2.8022719974356725e-05, |
| "loss": 0.0, |
| "step": 207500 |
| }, |
| { |
| "epoch": 0.3320421470036788, |
| "grad_norm": 0.00030716744367964566, |
| "learning_rate": 2.8013373263271147e-05, |
| "loss": 0.0, |
| "step": 208000 |
| }, |
| { |
| "epoch": 0.3328403252416684, |
| "grad_norm": 0.0003900101291947067, |
| "learning_rate": 2.8004006079408534e-05, |
| "loss": 0.0, |
| "step": 208500 |
| }, |
| { |
| "epoch": 0.333638503479658, |
| "grad_norm": 0.0009112692205235362, |
| "learning_rate": 2.799461843750544e-05, |
| "loss": 0.0, |
| "step": 209000 |
| }, |
| { |
| "epoch": 0.3344366817176476, |
| "grad_norm": 0.002314511453732848, |
| "learning_rate": 2.7985210352330603e-05, |
| "loss": 0.0, |
| "step": 209500 |
| }, |
| { |
| "epoch": 0.33523485995563723, |
| "grad_norm": 0.0004494874447118491, |
| "learning_rate": 2.7975781838684925e-05, |
| "loss": 0.0, |
| "step": 210000 |
| }, |
| { |
| "epoch": 0.33603303819362684, |
| "grad_norm": 0.000736879650503397, |
| "learning_rate": 2.7966332911401435e-05, |
| "loss": 0.0, |
| "step": 210500 |
| }, |
| { |
| "epoch": 0.33683121643161645, |
| "grad_norm": 0.0036531416699290276, |
| "learning_rate": 2.7956863585345295e-05, |
| "loss": 0.0, |
| "step": 211000 |
| }, |
| { |
| "epoch": 0.33762939466960606, |
| "grad_norm": 0.00030530127696692944, |
| "learning_rate": 2.7947373875413744e-05, |
| "loss": 0.0, |
| "step": 211500 |
| }, |
| { |
| "epoch": 0.33842757290759573, |
| "grad_norm": 0.0014557373942807317, |
| "learning_rate": 2.79378637965361e-05, |
| "loss": 0.0, |
| "step": 212000 |
| }, |
| { |
| "epoch": 0.33922575114558534, |
| "grad_norm": 0.0008803669479675591, |
| "learning_rate": 2.7928333363673716e-05, |
| "loss": 0.0, |
| "step": 212500 |
| }, |
| { |
| "epoch": 0.34002392938357495, |
| "grad_norm": 0.00021209794795140624, |
| "learning_rate": 2.791878259181997e-05, |
| "loss": 0.0, |
| "step": 213000 |
| }, |
| { |
| "epoch": 0.34082210762156456, |
| "grad_norm": 0.0012858508853241801, |
| "learning_rate": 2.7909211496000238e-05, |
| "loss": 0.0, |
| "step": 213500 |
| }, |
| { |
| "epoch": 0.3416202858595542, |
| "grad_norm": 0.0002028387680184096, |
| "learning_rate": 2.7899620091271874e-05, |
| "loss": 0.0, |
| "step": 214000 |
| }, |
| { |
| "epoch": 0.3424184640975438, |
| "grad_norm": 0.000373926421161741, |
| "learning_rate": 2.789000839272417e-05, |
| "loss": 0.0, |
| "step": 214500 |
| }, |
| { |
| "epoch": 0.3432166423355334, |
| "grad_norm": 0.000344914966262877, |
| "learning_rate": 2.7880376415478354e-05, |
| "loss": 0.0, |
| "step": 215000 |
| }, |
| { |
| "epoch": 0.344014820573523, |
| "grad_norm": 0.0009230823488906026, |
| "learning_rate": 2.7870724174687565e-05, |
| "loss": 0.0, |
| "step": 215500 |
| }, |
| { |
| "epoch": 0.3448129988115126, |
| "grad_norm": 0.0006525011267513037, |
| "learning_rate": 2.7861051685536798e-05, |
| "loss": 0.0, |
| "step": 216000 |
| }, |
| { |
| "epoch": 0.3456111770495022, |
| "grad_norm": 0.0021701750811189413, |
| "learning_rate": 2.785135896324292e-05, |
| "loss": 0.0, |
| "step": 216500 |
| }, |
| { |
| "epoch": 0.34640935528749184, |
| "grad_norm": 0.0005550780915655196, |
| "learning_rate": 2.7841646023054628e-05, |
| "loss": 0.0, |
| "step": 217000 |
| }, |
| { |
| "epoch": 0.34720753352548145, |
| "grad_norm": 0.1462016999721527, |
| "learning_rate": 2.7831912880252417e-05, |
| "loss": 0.0, |
| "step": 217500 |
| }, |
| { |
| "epoch": 0.34800571176347106, |
| "grad_norm": 0.00038006107206456363, |
| "learning_rate": 2.7822159550148574e-05, |
| "loss": 0.0, |
| "step": 218000 |
| }, |
| { |
| "epoch": 0.34880389000146067, |
| "grad_norm": 0.0005914014764130116, |
| "learning_rate": 2.7812386048087145e-05, |
| "loss": 0.0, |
| "step": 218500 |
| }, |
| { |
| "epoch": 0.3496020682394503, |
| "grad_norm": 0.001984767848625779, |
| "learning_rate": 2.78025923894439e-05, |
| "loss": 0.0, |
| "step": 219000 |
| }, |
| { |
| "epoch": 0.3504002464774399, |
| "grad_norm": 0.001104337745346129, |
| "learning_rate": 2.779277858962633e-05, |
| "loss": 0.0, |
| "step": 219500 |
| }, |
| { |
| "epoch": 0.3511984247154295, |
| "grad_norm": 0.0004109264409635216, |
| "learning_rate": 2.7782944664073612e-05, |
| "loss": 0.0, |
| "step": 220000 |
| }, |
| { |
| "epoch": 0.3519966029534191, |
| "grad_norm": 0.0002343226078664884, |
| "learning_rate": 2.7773090628256574e-05, |
| "loss": 0.0, |
| "step": 220500 |
| }, |
| { |
| "epoch": 0.3527947811914087, |
| "grad_norm": 0.00041432067519053817, |
| "learning_rate": 2.77632164976777e-05, |
| "loss": 0.0, |
| "step": 221000 |
| }, |
| { |
| "epoch": 0.35359295942939833, |
| "grad_norm": 0.008808308281004429, |
| "learning_rate": 2.7753322287871073e-05, |
| "loss": 0.0, |
| "step": 221500 |
| }, |
| { |
| "epoch": 0.35439113766738795, |
| "grad_norm": 0.0015466894255951047, |
| "learning_rate": 2.774340801440236e-05, |
| "loss": 0.0, |
| "step": 222000 |
| }, |
| { |
| "epoch": 0.35518931590537756, |
| "grad_norm": 0.00031447981018573046, |
| "learning_rate": 2.773347369286882e-05, |
| "loss": 0.0, |
| "step": 222500 |
| }, |
| { |
| "epoch": 0.35598749414336717, |
| "grad_norm": 0.0005213140393607318, |
| "learning_rate": 2.7723519338899216e-05, |
| "loss": 0.0, |
| "step": 223000 |
| }, |
| { |
| "epoch": 0.3567856723813568, |
| "grad_norm": 0.0002478585811331868, |
| "learning_rate": 2.7713544968153853e-05, |
| "loss": 0.0, |
| "step": 223500 |
| }, |
| { |
| "epoch": 0.3575838506193464, |
| "grad_norm": 0.0010923146037384868, |
| "learning_rate": 2.7703550596324514e-05, |
| "loss": 0.0, |
| "step": 224000 |
| }, |
| { |
| "epoch": 0.358382028857336, |
| "grad_norm": 0.0004491193685680628, |
| "learning_rate": 2.769353623913445e-05, |
| "loss": 0.0, |
| "step": 224500 |
| }, |
| { |
| "epoch": 0.3591802070953256, |
| "grad_norm": 0.0028391792438924313, |
| "learning_rate": 2.7683501912338354e-05, |
| "loss": 0.0, |
| "step": 225000 |
| }, |
| { |
| "epoch": 0.3599783853333152, |
| "grad_norm": 0.0011303217615932226, |
| "learning_rate": 2.767344763172234e-05, |
| "loss": 0.0, |
| "step": 225500 |
| }, |
| { |
| "epoch": 0.36077656357130483, |
| "grad_norm": 0.00829398538917303, |
| "learning_rate": 2.7663373413103904e-05, |
| "loss": 0.0, |
| "step": 226000 |
| }, |
| { |
| "epoch": 0.36157474180929444, |
| "grad_norm": 0.00026492562028579414, |
| "learning_rate": 2.7653279272331912e-05, |
| "loss": 0.0, |
| "step": 226500 |
| }, |
| { |
| "epoch": 0.36237292004728405, |
| "grad_norm": 0.00034646346466615796, |
| "learning_rate": 2.764316522528658e-05, |
| "loss": 0.0, |
| "step": 227000 |
| }, |
| { |
| "epoch": 0.3631710982852737, |
| "grad_norm": 0.004864424932748079, |
| "learning_rate": 2.7633031287879434e-05, |
| "loss": 0.0, |
| "step": 227500 |
| }, |
| { |
| "epoch": 0.36396927652326333, |
| "grad_norm": 0.00020658916037064046, |
| "learning_rate": 2.7622877476053285e-05, |
| "loss": 0.0, |
| "step": 228000 |
| }, |
| { |
| "epoch": 0.36476745476125294, |
| "grad_norm": 0.00030613088165409863, |
| "learning_rate": 2.7612703805782225e-05, |
| "loss": 0.0, |
| "step": 228500 |
| }, |
| { |
| "epoch": 0.36556563299924255, |
| "grad_norm": 0.00909386295825243, |
| "learning_rate": 2.760251029307157e-05, |
| "loss": 0.0, |
| "step": 229000 |
| }, |
| { |
| "epoch": 0.36636381123723216, |
| "grad_norm": 0.020065903663635254, |
| "learning_rate": 2.7592296953957876e-05, |
| "loss": 0.0, |
| "step": 229500 |
| }, |
| { |
| "epoch": 0.3671619894752218, |
| "grad_norm": 0.00014274036220740527, |
| "learning_rate": 2.7582063804508868e-05, |
| "loss": 0.0, |
| "step": 230000 |
| }, |
| { |
| "epoch": 0.3679601677132114, |
| "grad_norm": 0.00015419685223605484, |
| "learning_rate": 2.7571810860823443e-05, |
| "loss": 0.0, |
| "step": 230500 |
| }, |
| { |
| "epoch": 0.368758345951201, |
| "grad_norm": 0.00025692241615615785, |
| "learning_rate": 2.7561538139031653e-05, |
| "loss": 0.0, |
| "step": 231000 |
| }, |
| { |
| "epoch": 0.3695565241891906, |
| "grad_norm": 0.0001514313480583951, |
| "learning_rate": 2.7551245655294637e-05, |
| "loss": 0.0, |
| "step": 231500 |
| }, |
| { |
| "epoch": 0.3703547024271802, |
| "grad_norm": 0.0005323386285454035, |
| "learning_rate": 2.7540933425804655e-05, |
| "loss": 0.0, |
| "step": 232000 |
| }, |
| { |
| "epoch": 0.3711528806651698, |
| "grad_norm": 0.029986457899212837, |
| "learning_rate": 2.7530601466785003e-05, |
| "loss": 0.0, |
| "step": 232500 |
| }, |
| { |
| "epoch": 0.37195105890315944, |
| "grad_norm": 530.8088989257812, |
| "learning_rate": 2.752024979449004e-05, |
| "loss": 0.0, |
| "step": 233000 |
| }, |
| { |
| "epoch": 0.37274923714114905, |
| "grad_norm": 0.0007142575341276824, |
| "learning_rate": 2.7509878425205117e-05, |
| "loss": 0.0, |
| "step": 233500 |
| }, |
| { |
| "epoch": 0.37354741537913866, |
| "grad_norm": 0.00024393905187025666, |
| "learning_rate": 2.7499487375246588e-05, |
| "loss": 0.0, |
| "step": 234000 |
| }, |
| { |
| "epoch": 0.37434559361712827, |
| "grad_norm": 0.00046698853839188814, |
| "learning_rate": 2.7489076660961762e-05, |
| "loss": 0.0, |
| "step": 234500 |
| }, |
| { |
| "epoch": 0.3751437718551179, |
| "grad_norm": 3171.53173828125, |
| "learning_rate": 2.7478646298728884e-05, |
| "loss": 0.0, |
| "step": 235000 |
| }, |
| { |
| "epoch": 0.3759419500931075, |
| "grad_norm": 0.0002929008915089071, |
| "learning_rate": 2.7468196304957114e-05, |
| "loss": 0.0, |
| "step": 235500 |
| }, |
| { |
| "epoch": 0.3767401283310971, |
| "grad_norm": 0.00025504553923383355, |
| "learning_rate": 2.7457726696086486e-05, |
| "loss": 0.0, |
| "step": 236000 |
| }, |
| { |
| "epoch": 0.3775383065690867, |
| "grad_norm": 0.0002734732406679541, |
| "learning_rate": 2.744723748858791e-05, |
| "loss": 0.0, |
| "step": 236500 |
| }, |
| { |
| "epoch": 0.3783364848070763, |
| "grad_norm": 0.0002443444973323494, |
| "learning_rate": 2.7436728698963115e-05, |
| "loss": 0.0, |
| "step": 237000 |
| }, |
| { |
| "epoch": 0.37913466304506593, |
| "grad_norm": 0.00032405051751993597, |
| "learning_rate": 2.742620034374463e-05, |
| "loss": 0.0, |
| "step": 237500 |
| }, |
| { |
| "epoch": 0.37993284128305554, |
| "grad_norm": 0.0015245258109644055, |
| "learning_rate": 2.7415652439495792e-05, |
| "loss": 0.0, |
| "step": 238000 |
| }, |
| { |
| "epoch": 0.38073101952104516, |
| "grad_norm": 0.0013498624321073294, |
| "learning_rate": 2.7405085002810664e-05, |
| "loss": 0.0, |
| "step": 238500 |
| }, |
| { |
| "epoch": 0.38152919775903477, |
| "grad_norm": 0.00023179441632237285, |
| "learning_rate": 2.739449805031406e-05, |
| "loss": 0.0, |
| "step": 239000 |
| }, |
| { |
| "epoch": 0.3823273759970244, |
| "grad_norm": 0.0005060135736130178, |
| "learning_rate": 2.7383891598661473e-05, |
| "loss": 0.0, |
| "step": 239500 |
| }, |
| { |
| "epoch": 0.383125554235014, |
| "grad_norm": 0.001060970826074481, |
| "learning_rate": 2.7373265664539094e-05, |
| "loss": 0.0, |
| "step": 240000 |
| }, |
| { |
| "epoch": 0.3839237324730036, |
| "grad_norm": 0.0004396582953631878, |
| "learning_rate": 2.7362620264663755e-05, |
| "loss": 0.0, |
| "step": 240500 |
| }, |
| { |
| "epoch": 0.3847219107109932, |
| "grad_norm": 0.0010933999437838793, |
| "learning_rate": 2.735195541578291e-05, |
| "loss": 0.0, |
| "step": 241000 |
| }, |
| { |
| "epoch": 0.3855200889489828, |
| "grad_norm": 0.0002554966777097434, |
| "learning_rate": 2.7341271134674613e-05, |
| "loss": 0.0, |
| "step": 241500 |
| }, |
| { |
| "epoch": 0.38631826718697243, |
| "grad_norm": 0.0016024510841816664, |
| "learning_rate": 2.7330567438147493e-05, |
| "loss": 0.0, |
| "step": 242000 |
| }, |
| { |
| "epoch": 0.38711644542496204, |
| "grad_norm": 0.0025034185964614153, |
| "learning_rate": 2.7319844343040706e-05, |
| "loss": 0.0, |
| "step": 242500 |
| }, |
| { |
| "epoch": 0.38791462366295165, |
| "grad_norm": 0.00293533387593925, |
| "learning_rate": 2.7309101866223954e-05, |
| "loss": 0.0, |
| "step": 243000 |
| }, |
| { |
| "epoch": 0.3887128019009413, |
| "grad_norm": 0.0003197678888682276, |
| "learning_rate": 2.7298340024597412e-05, |
| "loss": 0.0, |
| "step": 243500 |
| }, |
| { |
| "epoch": 0.38951098013893093, |
| "grad_norm": 0.0006886592600494623, |
| "learning_rate": 2.7287558835091715e-05, |
| "loss": 0.0, |
| "step": 244000 |
| }, |
| { |
| "epoch": 0.39030915837692054, |
| "grad_norm": 0.0002249764947919175, |
| "learning_rate": 2.7276758314667954e-05, |
| "loss": 0.0, |
| "step": 244500 |
| }, |
| { |
| "epoch": 0.39110733661491015, |
| "grad_norm": 0.26254597306251526, |
| "learning_rate": 2.7265938480317622e-05, |
| "loss": 0.0, |
| "step": 245000 |
| }, |
| { |
| "epoch": 0.39190551485289976, |
| "grad_norm": 0.0028902171179652214, |
| "learning_rate": 2.7255099349062593e-05, |
| "loss": 0.0, |
| "step": 245500 |
| }, |
| { |
| "epoch": 0.39270369309088937, |
| "grad_norm": 0.00023200709256343544, |
| "learning_rate": 2.7244240937955106e-05, |
| "loss": 0.0, |
| "step": 246000 |
| }, |
| { |
| "epoch": 0.393501871328879, |
| "grad_norm": 0.0006069283117540181, |
| "learning_rate": 2.7233363264077725e-05, |
| "loss": 0.0, |
| "step": 246500 |
| }, |
| { |
| "epoch": 0.3943000495668686, |
| "grad_norm": 0.0007884249207563698, |
| "learning_rate": 2.722246634454333e-05, |
| "loss": 0.0, |
| "step": 247000 |
| }, |
| { |
| "epoch": 0.3950982278048582, |
| "grad_norm": 0.0011736562009900808, |
| "learning_rate": 2.7211550196495058e-05, |
| "loss": 0.0, |
| "step": 247500 |
| }, |
| { |
| "epoch": 0.3958964060428478, |
| "grad_norm": 0.0001821365876821801, |
| "learning_rate": 2.7200614837106324e-05, |
| "loss": 0.0, |
| "step": 248000 |
| }, |
| { |
| "epoch": 0.3966945842808374, |
| "grad_norm": 0.0002176285779569298, |
| "learning_rate": 2.7189660283580738e-05, |
| "loss": 0.0, |
| "step": 248500 |
| }, |
| { |
| "epoch": 0.39749276251882704, |
| "grad_norm": 0.0016040855553001165, |
| "learning_rate": 2.7178686553152128e-05, |
| "loss": 0.0, |
| "step": 249000 |
| }, |
| { |
| "epoch": 0.39829094075681665, |
| "grad_norm": 0.0006293723708949983, |
| "learning_rate": 2.7167693663084484e-05, |
| "loss": 0.0, |
| "step": 249500 |
| }, |
| { |
| "epoch": 0.39908911899480626, |
| "grad_norm": 0.000314537959638983, |
| "learning_rate": 2.7156681630671932e-05, |
| "loss": 0.0, |
| "step": 250000 |
| }, |
| { |
| "epoch": 0.39988729723279587, |
| "grad_norm": 0.00024686212418600917, |
| "learning_rate": 2.7145650473238724e-05, |
| "loss": 0.0, |
| "step": 250500 |
| }, |
| { |
| "epoch": 0.4006854754707855, |
| "grad_norm": 0.00021431001368910074, |
| "learning_rate": 2.713460020813919e-05, |
| "loss": 0.0, |
| "step": 251000 |
| }, |
| { |
| "epoch": 0.4014836537087751, |
| "grad_norm": 52.81241989135742, |
| "learning_rate": 2.7123530852757722e-05, |
| "loss": 0.0, |
| "step": 251500 |
| }, |
| { |
| "epoch": 0.4022818319467647, |
| "grad_norm": 0.00014636837295256555, |
| "learning_rate": 2.711244242450876e-05, |
| "loss": 0.0, |
| "step": 252000 |
| }, |
| { |
| "epoch": 0.4030800101847543, |
| "grad_norm": 0.0008935470250435174, |
| "learning_rate": 2.710133494083672e-05, |
| "loss": 0.0, |
| "step": 252500 |
| }, |
| { |
| "epoch": 0.4038781884227439, |
| "grad_norm": 0.00020280707394704223, |
| "learning_rate": 2.7090208419216022e-05, |
| "loss": 0.0, |
| "step": 253000 |
| }, |
| { |
| "epoch": 0.40467636666073353, |
| "grad_norm": 509.4928283691406, |
| "learning_rate": 2.707906287715103e-05, |
| "loss": 0.0, |
| "step": 253500 |
| }, |
| { |
| "epoch": 0.40547454489872314, |
| "grad_norm": 0.0008828208665363491, |
| "learning_rate": 2.7067898332176025e-05, |
| "loss": 0.0, |
| "step": 254000 |
| }, |
| { |
| "epoch": 0.40627272313671275, |
| "grad_norm": 0.00014347408432513475, |
| "learning_rate": 2.705671480185519e-05, |
| "loss": 0.0, |
| "step": 254500 |
| }, |
| { |
| "epoch": 0.40707090137470237, |
| "grad_norm": 0.0016916063614189625, |
| "learning_rate": 2.7045512303782576e-05, |
| "loss": 0.0, |
| "step": 255000 |
| }, |
| { |
| "epoch": 0.407869079612692, |
| "grad_norm": 0.00024378852685913444, |
| "learning_rate": 2.7034290855582063e-05, |
| "loss": 0.0, |
| "step": 255500 |
| }, |
| { |
| "epoch": 0.4086672578506816, |
| "grad_norm": 0.000262060813838616, |
| "learning_rate": 2.7023050474907364e-05, |
| "loss": 0.0, |
| "step": 256000 |
| }, |
| { |
| "epoch": 0.4094654360886712, |
| "grad_norm": 0.0003295161877758801, |
| "learning_rate": 2.7011791179441954e-05, |
| "loss": 0.0, |
| "step": 256500 |
| }, |
| { |
| "epoch": 0.4102636143266608, |
| "grad_norm": 0.0004501194052863866, |
| "learning_rate": 2.7000512986899083e-05, |
| "loss": 0.0, |
| "step": 257000 |
| }, |
| { |
| "epoch": 0.4110617925646504, |
| "grad_norm": 0.00023809456615708768, |
| "learning_rate": 2.6989215915021727e-05, |
| "loss": 0.0, |
| "step": 257500 |
| }, |
| { |
| "epoch": 0.41185997080264003, |
| "grad_norm": 0.000276111182756722, |
| "learning_rate": 2.697789998158255e-05, |
| "loss": 0.0, |
| "step": 258000 |
| }, |
| { |
| "epoch": 0.41265814904062964, |
| "grad_norm": 0.00017680577002465725, |
| "learning_rate": 2.6966565204383905e-05, |
| "loss": 0.0, |
| "step": 258500 |
| }, |
| { |
| "epoch": 0.4134563272786193, |
| "grad_norm": 0.00023531143961008638, |
| "learning_rate": 2.695521160125778e-05, |
| "loss": 0.0, |
| "step": 259000 |
| }, |
| { |
| "epoch": 0.4142545055166089, |
| "grad_norm": 0.000134236179292202, |
| "learning_rate": 2.694383919006579e-05, |
| "loss": 0.0, |
| "step": 259500 |
| }, |
| { |
| "epoch": 0.41505268375459853, |
| "grad_norm": 1656.9716796875, |
| "learning_rate": 2.6932447988699128e-05, |
| "loss": 0.0, |
| "step": 260000 |
| }, |
| { |
| "epoch": 0.41585086199258814, |
| "grad_norm": 0.0007973301107995212, |
| "learning_rate": 2.6921038015078554e-05, |
| "loss": 0.0, |
| "step": 260500 |
| }, |
| { |
| "epoch": 0.41664904023057775, |
| "grad_norm": 0.0007964337710291147, |
| "learning_rate": 2.690960928715436e-05, |
| "loss": 0.0001, |
| "step": 261000 |
| }, |
| { |
| "epoch": 0.41744721846856736, |
| "grad_norm": 0.0003351388149894774, |
| "learning_rate": 2.6898161822906345e-05, |
| "loss": 0.0, |
| "step": 261500 |
| }, |
| { |
| "epoch": 0.41824539670655697, |
| "grad_norm": 0.0008457360090687871, |
| "learning_rate": 2.6886695640343773e-05, |
| "loss": 0.0, |
| "step": 262000 |
| }, |
| { |
| "epoch": 0.4190435749445466, |
| "grad_norm": 0.00018712795281317085, |
| "learning_rate": 2.6875210757505373e-05, |
| "loss": 0.0, |
| "step": 262500 |
| }, |
| { |
| "epoch": 0.4198417531825362, |
| "grad_norm": 0.00024416804080829024, |
| "learning_rate": 2.686370719245928e-05, |
| "loss": 0.0, |
| "step": 263000 |
| }, |
| { |
| "epoch": 0.4206399314205258, |
| "grad_norm": 0.0003195313038304448, |
| "learning_rate": 2.685218496330303e-05, |
| "loss": 0.0, |
| "step": 263500 |
| }, |
| { |
| "epoch": 0.4214381096585154, |
| "grad_norm": 0.00019497050379868597, |
| "learning_rate": 2.6840644088163508e-05, |
| "loss": 0.0, |
| "step": 264000 |
| }, |
| { |
| "epoch": 0.422236287896505, |
| "grad_norm": 0.004070492926985025, |
| "learning_rate": 2.6829084585196943e-05, |
| "loss": 0.0, |
| "step": 264500 |
| }, |
| { |
| "epoch": 0.42303446613449464, |
| "grad_norm": 0.0002645227941684425, |
| "learning_rate": 2.6817506472588872e-05, |
| "loss": 0.0, |
| "step": 265000 |
| }, |
| { |
| "epoch": 0.42383264437248425, |
| "grad_norm": 0.00046545593068003654, |
| "learning_rate": 2.6805909768554106e-05, |
| "loss": 0.0, |
| "step": 265500 |
| }, |
| { |
| "epoch": 0.42463082261047386, |
| "grad_norm": 0.00025627054856158793, |
| "learning_rate": 2.6794294491336703e-05, |
| "loss": 0.0, |
| "step": 266000 |
| }, |
| { |
| "epoch": 0.42542900084846347, |
| "grad_norm": 0.0002409874286968261, |
| "learning_rate": 2.6782660659209935e-05, |
| "loss": 0.0, |
| "step": 266500 |
| }, |
| { |
| "epoch": 0.4262271790864531, |
| "grad_norm": 0.0008856968488544226, |
| "learning_rate": 2.6771008290476268e-05, |
| "loss": 0.0, |
| "step": 267000 |
| }, |
| { |
| "epoch": 0.4270253573244427, |
| "grad_norm": 0.0036024507135152817, |
| "learning_rate": 2.6759337403467344e-05, |
| "loss": 0.0, |
| "step": 267500 |
| }, |
| { |
| "epoch": 0.4278235355624323, |
| "grad_norm": 0.0002771168656181544, |
| "learning_rate": 2.6747648016543918e-05, |
| "loss": 0.0, |
| "step": 268000 |
| }, |
| { |
| "epoch": 0.4286217138004219, |
| "grad_norm": 2055.608642578125, |
| "learning_rate": 2.6735940148095856e-05, |
| "loss": 0.0, |
| "step": 268500 |
| }, |
| { |
| "epoch": 0.4294198920384115, |
| "grad_norm": 0.00016251685156021267, |
| "learning_rate": 2.6724213816542105e-05, |
| "loss": 0.0, |
| "step": 269000 |
| }, |
| { |
| "epoch": 0.43021807027640113, |
| "grad_norm": 0.0016744077438488603, |
| "learning_rate": 2.6712469040330658e-05, |
| "loss": 0.0, |
| "step": 269500 |
| }, |
| { |
| "epoch": 0.43101624851439074, |
| "grad_norm": 0.0008171962690539658, |
| "learning_rate": 2.670070583793851e-05, |
| "loss": 0.0, |
| "step": 270000 |
| }, |
| { |
| "epoch": 0.43181442675238035, |
| "grad_norm": 0.0004002843634225428, |
| "learning_rate": 2.6688924227871667e-05, |
| "loss": 0.0, |
| "step": 270500 |
| }, |
| { |
| "epoch": 0.43261260499036996, |
| "grad_norm": 0.0014142803847789764, |
| "learning_rate": 2.667712422866508e-05, |
| "loss": 0.0, |
| "step": 271000 |
| }, |
| { |
| "epoch": 0.4334107832283596, |
| "grad_norm": 0.0025327634066343307, |
| "learning_rate": 2.6665305858882637e-05, |
| "loss": 0.0, |
| "step": 271500 |
| }, |
| { |
| "epoch": 0.4342089614663492, |
| "grad_norm": 0.0027283141389489174, |
| "learning_rate": 2.665346913711711e-05, |
| "loss": 0.0, |
| "step": 272000 |
| }, |
| { |
| "epoch": 0.4350071397043388, |
| "grad_norm": 0.00230594165623188, |
| "learning_rate": 2.6641614081990168e-05, |
| "loss": 0.0, |
| "step": 272500 |
| }, |
| { |
| "epoch": 0.4358053179423284, |
| "grad_norm": 0.00019310094648972154, |
| "learning_rate": 2.6629740712152305e-05, |
| "loss": 0.0, |
| "step": 273000 |
| }, |
| { |
| "epoch": 0.436603496180318, |
| "grad_norm": 0.00035802560159936547, |
| "learning_rate": 2.661784904628283e-05, |
| "loss": 0.0, |
| "step": 273500 |
| }, |
| { |
| "epoch": 0.43740167441830763, |
| "grad_norm": 0.0004388946108520031, |
| "learning_rate": 2.6605939103089848e-05, |
| "loss": 0.0, |
| "step": 274000 |
| }, |
| { |
| "epoch": 0.43819985265629724, |
| "grad_norm": 0.0002641767496243119, |
| "learning_rate": 2.6594010901310196e-05, |
| "loss": 0.0, |
| "step": 274500 |
| }, |
| { |
| "epoch": 0.4389980308942869, |
| "grad_norm": 0.002404822502285242, |
| "learning_rate": 2.658206445970945e-05, |
| "loss": 0.0, |
| "step": 275000 |
| }, |
| { |
| "epoch": 0.4397962091322765, |
| "grad_norm": 0.011408819817006588, |
| "learning_rate": 2.6570099797081885e-05, |
| "loss": 0.0, |
| "step": 275500 |
| }, |
| { |
| "epoch": 0.4405943873702661, |
| "grad_norm": 7161.9951171875, |
| "learning_rate": 2.6558116932250428e-05, |
| "loss": 0.0, |
| "step": 276000 |
| }, |
| { |
| "epoch": 0.44139256560825574, |
| "grad_norm": 0.0002718472678679973, |
| "learning_rate": 2.654611588406666e-05, |
| "loss": 0.0, |
| "step": 276500 |
| }, |
| { |
| "epoch": 0.44219074384624535, |
| "grad_norm": 0.8729614019393921, |
| "learning_rate": 2.6534096671410745e-05, |
| "loss": 0.0, |
| "step": 277000 |
| }, |
| { |
| "epoch": 0.44298892208423496, |
| "grad_norm": 0.00037378541310317814, |
| "learning_rate": 2.652205931319144e-05, |
| "loss": 0.0, |
| "step": 277500 |
| }, |
| { |
| "epoch": 0.44378710032222457, |
| "grad_norm": 0.002113162772729993, |
| "learning_rate": 2.6510003828346052e-05, |
| "loss": 0.0, |
| "step": 278000 |
| }, |
| { |
| "epoch": 0.4445852785602142, |
| "grad_norm": 0.0005606426857411861, |
| "learning_rate": 2.649793023584039e-05, |
| "loss": 0.0, |
| "step": 278500 |
| }, |
| { |
| "epoch": 0.4453834567982038, |
| "grad_norm": 0.0006437928532250226, |
| "learning_rate": 2.6485838554668765e-05, |
| "loss": 0.0, |
| "step": 279000 |
| }, |
| { |
| "epoch": 0.4461816350361934, |
| "grad_norm": 0.0003039956500288099, |
| "learning_rate": 2.6473728803853925e-05, |
| "loss": 0.0, |
| "step": 279500 |
| }, |
| { |
| "epoch": 0.446979813274183, |
| "grad_norm": 0.01145413052290678, |
| "learning_rate": 2.646160100244707e-05, |
| "loss": 0.0, |
| "step": 280000 |
| }, |
| { |
| "epoch": 0.4477779915121726, |
| "grad_norm": 0.010469400323927402, |
| "learning_rate": 2.6449455169527788e-05, |
| "loss": 0.0, |
| "step": 280500 |
| }, |
| { |
| "epoch": 0.44857616975016223, |
| "grad_norm": 0.00035089420271106064, |
| "learning_rate": 2.643729132420402e-05, |
| "loss": 0.0, |
| "step": 281000 |
| }, |
| { |
| "epoch": 0.44937434798815185, |
| "grad_norm": 0.00039669257239438593, |
| "learning_rate": 2.6425109485612066e-05, |
| "loss": 0.0, |
| "step": 281500 |
| }, |
| { |
| "epoch": 0.45017252622614146, |
| "grad_norm": 0.00023531325859948993, |
| "learning_rate": 2.6412909672916523e-05, |
| "loss": 0.0, |
| "step": 282000 |
| }, |
| { |
| "epoch": 0.45097070446413107, |
| "grad_norm": 0.00033051602076739073, |
| "learning_rate": 2.6400691905310262e-05, |
| "loss": 0.0, |
| "step": 282500 |
| }, |
| { |
| "epoch": 0.4517688827021207, |
| "grad_norm": 0.00016060993948485702, |
| "learning_rate": 2.638845620201441e-05, |
| "loss": 0.0, |
| "step": 283000 |
| }, |
| { |
| "epoch": 0.4525670609401103, |
| "grad_norm": 0.11830911040306091, |
| "learning_rate": 2.6376202582278307e-05, |
| "loss": 0.0, |
| "step": 283500 |
| }, |
| { |
| "epoch": 0.4533652391780999, |
| "grad_norm": 0.0002819143410306424, |
| "learning_rate": 2.636393106537947e-05, |
| "loss": 0.0, |
| "step": 284000 |
| }, |
| { |
| "epoch": 0.4541634174160895, |
| "grad_norm": 0.00021263032977003604, |
| "learning_rate": 2.6351641670623583e-05, |
| "loss": 0.0, |
| "step": 284500 |
| }, |
| { |
| "epoch": 0.4549615956540791, |
| "grad_norm": 0.0021154058631509542, |
| "learning_rate": 2.633933441734445e-05, |
| "loss": 0.0, |
| "step": 285000 |
| }, |
| { |
| "epoch": 0.45575977389206873, |
| "grad_norm": 58.90241622924805, |
| "learning_rate": 2.6327009324903978e-05, |
| "loss": 0.0, |
| "step": 285500 |
| }, |
| { |
| "epoch": 0.45655795213005834, |
| "grad_norm": 0.003388627665117383, |
| "learning_rate": 2.631466641269213e-05, |
| "loss": 0.0, |
| "step": 286000 |
| }, |
| { |
| "epoch": 0.45735613036804795, |
| "grad_norm": 0.00012820272240787745, |
| "learning_rate": 2.6302305700126908e-05, |
| "loss": 0.0, |
| "step": 286500 |
| }, |
| { |
| "epoch": 0.45815430860603756, |
| "grad_norm": 0.000897783029358834, |
| "learning_rate": 2.6289927206654315e-05, |
| "loss": 0.0, |
| "step": 287000 |
| }, |
| { |
| "epoch": 0.4589524868440272, |
| "grad_norm": 0.04280461370944977, |
| "learning_rate": 2.627753095174833e-05, |
| "loss": 0.0, |
| "step": 287500 |
| }, |
| { |
| "epoch": 0.4597506650820168, |
| "grad_norm": 0.0004180770483799279, |
| "learning_rate": 2.6265116954910868e-05, |
| "loss": 0.0, |
| "step": 288000 |
| }, |
| { |
| "epoch": 0.4605488433200064, |
| "grad_norm": 0.00025683449348434806, |
| "learning_rate": 2.625268523567177e-05, |
| "loss": 0.0, |
| "step": 288500 |
| }, |
| { |
| "epoch": 0.461347021557996, |
| "grad_norm": 504.5489196777344, |
| "learning_rate": 2.6240235813588738e-05, |
| "loss": 0.0, |
| "step": 289000 |
| }, |
| { |
| "epoch": 0.4621451997959856, |
| "grad_norm": 0.0005199440638534725, |
| "learning_rate": 2.6227768708247343e-05, |
| "loss": 0.0, |
| "step": 289500 |
| }, |
| { |
| "epoch": 0.46294337803397523, |
| "grad_norm": 0.04056164249777794, |
| "learning_rate": 2.6215283939260964e-05, |
| "loss": 0.0, |
| "step": 290000 |
| }, |
| { |
| "epoch": 0.4637415562719649, |
| "grad_norm": 0.0003166797396261245, |
| "learning_rate": 2.6202781526270773e-05, |
| "loss": 0.0, |
| "step": 290500 |
| }, |
| { |
| "epoch": 0.4645397345099545, |
| "grad_norm": 0.008205456659197807, |
| "learning_rate": 2.61902614889457e-05, |
| "loss": 0.0, |
| "step": 291000 |
| }, |
| { |
| "epoch": 0.4653379127479441, |
| "grad_norm": 0.004406485706567764, |
| "learning_rate": 2.6177723846982398e-05, |
| "loss": 0.0, |
| "step": 291500 |
| }, |
| { |
| "epoch": 0.4661360909859337, |
| "grad_norm": 0.007951625622808933, |
| "learning_rate": 2.6165168620105222e-05, |
| "loss": 0.0, |
| "step": 292000 |
| }, |
| { |
| "epoch": 0.46693426922392334, |
| "grad_norm": 0.00030562348547391593, |
| "learning_rate": 2.6152595828066183e-05, |
| "loss": 0.0, |
| "step": 292500 |
| }, |
| { |
| "epoch": 0.46773244746191295, |
| "grad_norm": 0.0004488139820750803, |
| "learning_rate": 2.6140005490644937e-05, |
| "loss": 0.0, |
| "step": 293000 |
| }, |
| { |
| "epoch": 0.46853062569990256, |
| "grad_norm": 0.00018583855126053095, |
| "learning_rate": 2.6127397627648736e-05, |
| "loss": 0.0, |
| "step": 293500 |
| }, |
| { |
| "epoch": 0.46932880393789217, |
| "grad_norm": 0.1338287740945816, |
| "learning_rate": 2.6114772258912394e-05, |
| "loss": 0.0, |
| "step": 294000 |
| }, |
| { |
| "epoch": 0.4701269821758818, |
| "grad_norm": 3.4950578212738037, |
| "learning_rate": 2.610212940429829e-05, |
| "loss": 0.0, |
| "step": 294500 |
| }, |
| { |
| "epoch": 0.4709251604138714, |
| "grad_norm": 0.000191990053281188, |
| "learning_rate": 2.6089469083696288e-05, |
| "loss": 0.0, |
| "step": 295000 |
| }, |
| { |
| "epoch": 0.471723338651861, |
| "grad_norm": 0.00013626097643282264, |
| "learning_rate": 2.607679131702374e-05, |
| "loss": 0.0, |
| "step": 295500 |
| }, |
| { |
| "epoch": 0.4725215168898506, |
| "grad_norm": 0.0008477133233100176, |
| "learning_rate": 2.6064096124225448e-05, |
| "loss": 0.0, |
| "step": 296000 |
| }, |
| { |
| "epoch": 0.4733196951278402, |
| "grad_norm": 0.06711713969707489, |
| "learning_rate": 2.6051383525273614e-05, |
| "loss": 0.0, |
| "step": 296500 |
| }, |
| { |
| "epoch": 0.47411787336582983, |
| "grad_norm": 0.00024246216344181448, |
| "learning_rate": 2.6038653540167845e-05, |
| "loss": 0.0, |
| "step": 297000 |
| }, |
| { |
| "epoch": 0.47491605160381944, |
| "grad_norm": 0.0030509470961987972, |
| "learning_rate": 2.6025906188935084e-05, |
| "loss": 0.0, |
| "step": 297500 |
| }, |
| { |
| "epoch": 0.47571422984180906, |
| "grad_norm": 0.004496394656598568, |
| "learning_rate": 2.6013141491629597e-05, |
| "loss": 0.0, |
| "step": 298000 |
| }, |
| { |
| "epoch": 0.47651240807979867, |
| "grad_norm": 0.000243777220021002, |
| "learning_rate": 2.600035946833294e-05, |
| "loss": 0.0, |
| "step": 298500 |
| }, |
| { |
| "epoch": 0.4773105863177883, |
| "grad_norm": 0.06534085422754288, |
| "learning_rate": 2.5987560139153936e-05, |
| "loss": 0.0, |
| "step": 299000 |
| }, |
| { |
| "epoch": 0.4781087645557779, |
| "grad_norm": 0.001955215120688081, |
| "learning_rate": 2.5974743524228625e-05, |
| "loss": 0.0, |
| "step": 299500 |
| }, |
| { |
| "epoch": 0.4789069427937675, |
| "grad_norm": 0.01624520681798458, |
| "learning_rate": 2.596190964372023e-05, |
| "loss": 0.0, |
| "step": 300000 |
| }, |
| { |
| "epoch": 0.4789069427937675, |
| "eval_loss": 1.7790502170100808e-05, |
| "eval_runtime": 21821.523, |
| "eval_samples_per_second": 102.069, |
| "eval_steps_per_second": 3.19, |
| "step": 300000 |
| }, |
| { |
| "epoch": 0.4797051210317571, |
| "grad_norm": 0.5646551251411438, |
| "learning_rate": 2.5949058517819156e-05, |
| "loss": 0.0, |
| "step": 300500 |
| }, |
| { |
| "epoch": 0.4805032992697467, |
| "grad_norm": 0.7845109701156616, |
| "learning_rate": 2.5936190166742935e-05, |
| "loss": 0.0, |
| "step": 301000 |
| }, |
| { |
| "epoch": 0.48130147750773633, |
| "grad_norm": 0.0011155412066727877, |
| "learning_rate": 2.592330461073619e-05, |
| "loss": 0.0, |
| "step": 301500 |
| }, |
| { |
| "epoch": 0.48209965574572594, |
| "grad_norm": 119.20901489257812, |
| "learning_rate": 2.591040187007061e-05, |
| "loss": 0.0, |
| "step": 302000 |
| }, |
| { |
| "epoch": 0.48289783398371555, |
| "grad_norm": 0.0005831182352267206, |
| "learning_rate": 2.589748196504493e-05, |
| "loss": 0.0, |
| "step": 302500 |
| }, |
| { |
| "epoch": 0.48369601222170516, |
| "grad_norm": 0.0002959502162411809, |
| "learning_rate": 2.5884544915984875e-05, |
| "loss": 0.0, |
| "step": 303000 |
| }, |
| { |
| "epoch": 0.4844941904596948, |
| "grad_norm": 0.23617546260356903, |
| "learning_rate": 2.587159074324316e-05, |
| "loss": 0.0, |
| "step": 303500 |
| }, |
| { |
| "epoch": 0.4852923686976844, |
| "grad_norm": 0.0008308735559694469, |
| "learning_rate": 2.5858619467199415e-05, |
| "loss": 0.0, |
| "step": 304000 |
| }, |
| { |
| "epoch": 0.486090546935674, |
| "grad_norm": 0.0004012871941085905, |
| "learning_rate": 2.584563110826019e-05, |
| "loss": 0.0, |
| "step": 304500 |
| }, |
| { |
| "epoch": 0.4868887251736636, |
| "grad_norm": 0.0012458977289497852, |
| "learning_rate": 2.5832625686858918e-05, |
| "loss": 0.0, |
| "step": 305000 |
| }, |
| { |
| "epoch": 0.4876869034116532, |
| "grad_norm": 0.00029142654966562986, |
| "learning_rate": 2.5819603223455854e-05, |
| "loss": 0.0, |
| "step": 305500 |
| }, |
| { |
| "epoch": 0.4884850816496429, |
| "grad_norm": 0.00011621385056059808, |
| "learning_rate": 2.5806563738538086e-05, |
| "loss": 0.0, |
| "step": 306000 |
| }, |
| { |
| "epoch": 0.4892832598876325, |
| "grad_norm": 0.0011832008603960276, |
| "learning_rate": 2.5793507252619474e-05, |
| "loss": 0.0, |
| "step": 306500 |
| }, |
| { |
| "epoch": 0.4900814381256221, |
| "grad_norm": 0.0010382416658103466, |
| "learning_rate": 2.5780433786240605e-05, |
| "loss": 0.0, |
| "step": 307000 |
| }, |
| { |
| "epoch": 0.4908796163636117, |
| "grad_norm": 0.0004220962291583419, |
| "learning_rate": 2.5767343359968803e-05, |
| "loss": 0.0, |
| "step": 307500 |
| }, |
| { |
| "epoch": 0.4916777946016013, |
| "grad_norm": 0.00026486560818739235, |
| "learning_rate": 2.5754235994398073e-05, |
| "loss": 0.0, |
| "step": 308000 |
| }, |
| { |
| "epoch": 0.49247597283959094, |
| "grad_norm": 0.00047953566536307335, |
| "learning_rate": 2.574111171014905e-05, |
| "loss": 0.0, |
| "step": 308500 |
| }, |
| { |
| "epoch": 0.49327415107758055, |
| "grad_norm": 0.0001465219829697162, |
| "learning_rate": 2.5727970527868998e-05, |
| "loss": 0.0, |
| "step": 309000 |
| }, |
| { |
| "epoch": 0.49407232931557016, |
| "grad_norm": 0.0002420053497189656, |
| "learning_rate": 2.5714812468231773e-05, |
| "loss": 0.0, |
| "step": 309500 |
| }, |
| { |
| "epoch": 0.49487050755355977, |
| "grad_norm": 0.00032595338416285813, |
| "learning_rate": 2.5701637551937767e-05, |
| "loss": 0.0, |
| "step": 310000 |
| }, |
| { |
| "epoch": 0.4956686857915494, |
| "grad_norm": 0.00023393328592646867, |
| "learning_rate": 2.5688445799713905e-05, |
| "loss": 0.0, |
| "step": 310500 |
| }, |
| { |
| "epoch": 0.496466864029539, |
| "grad_norm": 0.00023959919053595513, |
| "learning_rate": 2.5675237232313584e-05, |
| "loss": 0.0, |
| "step": 311000 |
| }, |
| { |
| "epoch": 0.4972650422675286, |
| "grad_norm": 0.00025843450566753745, |
| "learning_rate": 2.5662011870516667e-05, |
| "loss": 0.0, |
| "step": 311500 |
| }, |
| { |
| "epoch": 0.4980632205055182, |
| "grad_norm": 0.0003428571508266032, |
| "learning_rate": 2.5648769735129435e-05, |
| "loss": 0.0, |
| "step": 312000 |
| }, |
| { |
| "epoch": 0.4988613987435078, |
| "grad_norm": 0.0012112685944885015, |
| "learning_rate": 2.5635510846984554e-05, |
| "loss": 0.0, |
| "step": 312500 |
| }, |
| { |
| "epoch": 0.49965957698149743, |
| "grad_norm": 0.00033978992723859847, |
| "learning_rate": 2.5622235226941047e-05, |
| "loss": 0.0, |
| "step": 313000 |
| }, |
| { |
| "epoch": 0.500457755219487, |
| "grad_norm": 0.00017820294306147844, |
| "learning_rate": 2.560894289588426e-05, |
| "loss": 0.0, |
| "step": 313500 |
| }, |
| { |
| "epoch": 0.5012559334574767, |
| "grad_norm": 0.00042557052802294493, |
| "learning_rate": 2.5595633874725832e-05, |
| "loss": 0.0, |
| "step": 314000 |
| }, |
| { |
| "epoch": 0.5020541116954663, |
| "grad_norm": 0.00031017063884064555, |
| "learning_rate": 2.5582308184403653e-05, |
| "loss": 0.0, |
| "step": 314500 |
| }, |
| { |
| "epoch": 0.5028522899334559, |
| "grad_norm": 0.00034823661553673446, |
| "learning_rate": 2.5568965845881843e-05, |
| "loss": 0.0, |
| "step": 315000 |
| }, |
| { |
| "epoch": 0.5036504681714455, |
| "grad_norm": 0.00034386530751362443, |
| "learning_rate": 2.5555606880150712e-05, |
| "loss": 0.0, |
| "step": 315500 |
| }, |
| { |
| "epoch": 0.5044486464094351, |
| "grad_norm": 0.0007472603465430439, |
| "learning_rate": 2.554223130822672e-05, |
| "loss": 0.0, |
| "step": 316000 |
| }, |
| { |
| "epoch": 0.5052468246474248, |
| "grad_norm": 4685.83837890625, |
| "learning_rate": 2.5528839151152466e-05, |
| "loss": 0.0, |
| "step": 316500 |
| }, |
| { |
| "epoch": 0.5060450028854143, |
| "grad_norm": 0.0013435595901682973, |
| "learning_rate": 2.5515430429996633e-05, |
| "loss": 0.0, |
| "step": 317000 |
| }, |
| { |
| "epoch": 0.506843181123404, |
| "grad_norm": 0.00042831370956264436, |
| "learning_rate": 2.550200516585396e-05, |
| "loss": 0.0, |
| "step": 317500 |
| }, |
| { |
| "epoch": 0.5076413593613935, |
| "grad_norm": 0.00022195317433215678, |
| "learning_rate": 2.548856337984522e-05, |
| "loss": 0.0, |
| "step": 318000 |
| }, |
| { |
| "epoch": 0.5084395375993832, |
| "grad_norm": 0.0003667280252557248, |
| "learning_rate": 2.5475105093117168e-05, |
| "loss": 0.0, |
| "step": 318500 |
| }, |
| { |
| "epoch": 0.5092377158373728, |
| "grad_norm": 0.0003962449845857918, |
| "learning_rate": 2.546163032684253e-05, |
| "loss": 0.0, |
| "step": 319000 |
| }, |
| { |
| "epoch": 0.5100358940753624, |
| "grad_norm": 0.00021383292914833874, |
| "learning_rate": 2.544813910221994e-05, |
| "loss": 0.0, |
| "step": 319500 |
| }, |
| { |
| "epoch": 0.510834072313352, |
| "grad_norm": 0.00024260817735921592, |
| "learning_rate": 2.5434631440473945e-05, |
| "loss": 0.0, |
| "step": 320000 |
| }, |
| { |
| "epoch": 0.5116322505513416, |
| "grad_norm": 0.00034822686575353146, |
| "learning_rate": 2.5421107362854944e-05, |
| "loss": 0.0, |
| "step": 320500 |
| }, |
| { |
| "epoch": 0.5124304287893312, |
| "grad_norm": 0.8575116991996765, |
| "learning_rate": 2.5407566890639156e-05, |
| "loss": 0.0, |
| "step": 321000 |
| }, |
| { |
| "epoch": 0.5132286070273209, |
| "grad_norm": 0.17129367589950562, |
| "learning_rate": 2.5394010045128596e-05, |
| "loss": 0.0, |
| "step": 321500 |
| }, |
| { |
| "epoch": 0.5140267852653104, |
| "grad_norm": 0.000474643602501601, |
| "learning_rate": 2.5380436847651038e-05, |
| "loss": 0.0, |
| "step": 322000 |
| }, |
| { |
| "epoch": 0.5148249635033001, |
| "grad_norm": 0.00037549270200543106, |
| "learning_rate": 2.5366847319559975e-05, |
| "loss": 0.0, |
| "step": 322500 |
| }, |
| { |
| "epoch": 0.5156231417412896, |
| "grad_norm": 0.0004087206325493753, |
| "learning_rate": 2.5353241482234605e-05, |
| "loss": 0.0, |
| "step": 323000 |
| }, |
| { |
| "epoch": 0.5164213199792793, |
| "grad_norm": 0.11741995066404343, |
| "learning_rate": 2.5339619357079772e-05, |
| "loss": 0.0, |
| "step": 323500 |
| }, |
| { |
| "epoch": 0.5172194982172689, |
| "grad_norm": 0.00035057743662036955, |
| "learning_rate": 2.5325980965525945e-05, |
| "loss": 0.0, |
| "step": 324000 |
| }, |
| { |
| "epoch": 0.5180176764552585, |
| "grad_norm": 14.589872360229492, |
| "learning_rate": 2.5312326329029192e-05, |
| "loss": 0.0, |
| "step": 324500 |
| }, |
| { |
| "epoch": 0.5188158546932481, |
| "grad_norm": 0.00046064663911238313, |
| "learning_rate": 2.5298655469071128e-05, |
| "loss": 0.0, |
| "step": 325000 |
| }, |
| { |
| "epoch": 0.5196140329312378, |
| "grad_norm": 0.0005871613975614309, |
| "learning_rate": 2.5284968407158904e-05, |
| "loss": 0.0, |
| "step": 325500 |
| }, |
| { |
| "epoch": 0.5204122111692273, |
| "grad_norm": 0.00022222854022402316, |
| "learning_rate": 2.5271265164825135e-05, |
| "loss": 0.0, |
| "step": 326000 |
| }, |
| { |
| "epoch": 0.521210389407217, |
| "grad_norm": 0.0003983532660640776, |
| "learning_rate": 2.525754576362792e-05, |
| "loss": 0.0, |
| "step": 326500 |
| }, |
| { |
| "epoch": 0.5220085676452065, |
| "grad_norm": 0.0004353003459982574, |
| "learning_rate": 2.5243810225150764e-05, |
| "loss": 0.0, |
| "step": 327000 |
| }, |
| { |
| "epoch": 0.5228067458831962, |
| "grad_norm": 0.0003073965781368315, |
| "learning_rate": 2.523005857100256e-05, |
| "loss": 0.0, |
| "step": 327500 |
| }, |
| { |
| "epoch": 0.5236049241211858, |
| "grad_norm": 0.0001647748431423679, |
| "learning_rate": 2.5216290822817556e-05, |
| "loss": 0.0, |
| "step": 328000 |
| }, |
| { |
| "epoch": 0.5244031023591754, |
| "grad_norm": 0.00037706273724325, |
| "learning_rate": 2.520250700225532e-05, |
| "loss": 0.0, |
| "step": 328500 |
| }, |
| { |
| "epoch": 0.525201280597165, |
| "grad_norm": 0.00029242149321362376, |
| "learning_rate": 2.5188707131000714e-05, |
| "loss": 0.0, |
| "step": 329000 |
| }, |
| { |
| "epoch": 0.5259994588351546, |
| "grad_norm": 0.00018549045489635319, |
| "learning_rate": 2.5174891230763827e-05, |
| "loss": 0.0, |
| "step": 329500 |
| }, |
| { |
| "epoch": 0.5267976370731443, |
| "grad_norm": 0.00017411461158189923, |
| "learning_rate": 2.516105932327999e-05, |
| "loss": 0.0, |
| "step": 330000 |
| }, |
| { |
| "epoch": 0.5275958153111339, |
| "grad_norm": 0.0004608782473951578, |
| "learning_rate": 2.5147211430309704e-05, |
| "loss": 0.0, |
| "step": 330500 |
| }, |
| { |
| "epoch": 0.5283939935491235, |
| "grad_norm": 0.0007079532369971275, |
| "learning_rate": 2.5133347573638617e-05, |
| "loss": 0.0, |
| "step": 331000 |
| }, |
| { |
| "epoch": 0.5291921717871131, |
| "grad_norm": 0.0004867357783950865, |
| "learning_rate": 2.51194677750775e-05, |
| "loss": 0.0, |
| "step": 331500 |
| }, |
| { |
| "epoch": 0.5299903500251028, |
| "grad_norm": 0.0003103635390289128, |
| "learning_rate": 2.5105572056462206e-05, |
| "loss": 0.0, |
| "step": 332000 |
| }, |
| { |
| "epoch": 0.5307885282630923, |
| "grad_norm": 0.00025766075123101473, |
| "learning_rate": 2.5091660439653613e-05, |
| "loss": 0.0, |
| "step": 332500 |
| }, |
| { |
| "epoch": 0.531586706501082, |
| "grad_norm": 0.0006120207253843546, |
| "learning_rate": 2.5077732946537638e-05, |
| "loss": 0.0, |
| "step": 333000 |
| }, |
| { |
| "epoch": 0.5323848847390715, |
| "grad_norm": 0.0005194434197619557, |
| "learning_rate": 2.5063789599025148e-05, |
| "loss": 0.0, |
| "step": 333500 |
| }, |
| { |
| "epoch": 0.5331830629770612, |
| "grad_norm": 0.00029437083867378533, |
| "learning_rate": 2.5049830419051977e-05, |
| "loss": 0.0, |
| "step": 334000 |
| }, |
| { |
| "epoch": 0.5339812412150508, |
| "grad_norm": 0.00030525890178978443, |
| "learning_rate": 2.503585542857885e-05, |
| "loss": 0.0, |
| "step": 334500 |
| }, |
| { |
| "epoch": 0.5347794194530404, |
| "grad_norm": 0.00021708759595640004, |
| "learning_rate": 2.5021864649591373e-05, |
| "loss": 0.0, |
| "step": 335000 |
| }, |
| { |
| "epoch": 0.53557759769103, |
| "grad_norm": 0.0003332449123263359, |
| "learning_rate": 2.500785810409998e-05, |
| "loss": 0.0, |
| "step": 335500 |
| }, |
| { |
| "epoch": 0.5363757759290196, |
| "grad_norm": 0.0004120411758776754, |
| "learning_rate": 2.4993835814139924e-05, |
| "loss": 0.0, |
| "step": 336000 |
| }, |
| { |
| "epoch": 0.5371739541670092, |
| "grad_norm": 1294.555419921875, |
| "learning_rate": 2.497979780177122e-05, |
| "loss": 0.0, |
| "step": 336500 |
| }, |
| { |
| "epoch": 0.5379721324049989, |
| "grad_norm": 0.00044994373456574976, |
| "learning_rate": 2.496574408907862e-05, |
| "loss": 0.0, |
| "step": 337000 |
| }, |
| { |
| "epoch": 0.5387703106429884, |
| "grad_norm": 0.0004527137498371303, |
| "learning_rate": 2.4951674698171568e-05, |
| "loss": 0.0, |
| "step": 337500 |
| }, |
| { |
| "epoch": 0.5395684888809781, |
| "grad_norm": 0.00039052587817423046, |
| "learning_rate": 2.493758965118419e-05, |
| "loss": 0.0, |
| "step": 338000 |
| }, |
| { |
| "epoch": 0.5403666671189676, |
| "grad_norm": 0.003807082772254944, |
| "learning_rate": 2.4923488970275225e-05, |
| "loss": 0.0, |
| "step": 338500 |
| }, |
| { |
| "epoch": 0.5411648453569573, |
| "grad_norm": 0.000191923973034136, |
| "learning_rate": 2.4909372677628007e-05, |
| "loss": 0.0, |
| "step": 339000 |
| }, |
| { |
| "epoch": 0.5419630235949469, |
| "grad_norm": 0.0007527422276325524, |
| "learning_rate": 2.489524079545044e-05, |
| "loss": 0.0, |
| "step": 339500 |
| }, |
| { |
| "epoch": 0.5427612018329365, |
| "grad_norm": 0.0001783394836820662, |
| "learning_rate": 2.488109334597496e-05, |
| "loss": 0.0, |
| "step": 340000 |
| }, |
| { |
| "epoch": 0.5435593800709261, |
| "grad_norm": 0.0002899257524404675, |
| "learning_rate": 2.4866930351458482e-05, |
| "loss": 0.0, |
| "step": 340500 |
| }, |
| { |
| "epoch": 0.5443575583089157, |
| "grad_norm": 0.00021108388318680227, |
| "learning_rate": 2.4852751834182376e-05, |
| "loss": 0.0, |
| "step": 341000 |
| }, |
| { |
| "epoch": 0.5451557365469053, |
| "grad_norm": 0.0003050707746297121, |
| "learning_rate": 2.4838557816452438e-05, |
| "loss": 0.0, |
| "step": 341500 |
| }, |
| { |
| "epoch": 0.545953914784895, |
| "grad_norm": 0.0005089346086606383, |
| "learning_rate": 2.482434832059885e-05, |
| "loss": 0.0, |
| "step": 342000 |
| }, |
| { |
| "epoch": 0.5467520930228845, |
| "grad_norm": 0.0003350640181452036, |
| "learning_rate": 2.481012336897613e-05, |
| "loss": 0.0, |
| "step": 342500 |
| }, |
| { |
| "epoch": 0.5475502712608742, |
| "grad_norm": 0.00018076538981404155, |
| "learning_rate": 2.4795882983963133e-05, |
| "loss": 0.0, |
| "step": 343000 |
| }, |
| { |
| "epoch": 0.5483484494988637, |
| "grad_norm": 0.00013365145423449576, |
| "learning_rate": 2.4781627187962988e-05, |
| "loss": 0.0, |
| "step": 343500 |
| }, |
| { |
| "epoch": 0.5491466277368534, |
| "grad_norm": 0.00038794297142885625, |
| "learning_rate": 2.4767356003403056e-05, |
| "loss": 0.0, |
| "step": 344000 |
| }, |
| { |
| "epoch": 0.549944805974843, |
| "grad_norm": 0.0001909395505208522, |
| "learning_rate": 2.4753069452734923e-05, |
| "loss": 0.0, |
| "step": 344500 |
| }, |
| { |
| "epoch": 0.5507429842128326, |
| "grad_norm": 0.0001607197045814246, |
| "learning_rate": 2.4738767558434332e-05, |
| "loss": 0.0, |
| "step": 345000 |
| }, |
| { |
| "epoch": 0.5515411624508223, |
| "grad_norm": 0.00026962198899127543, |
| "learning_rate": 2.4724450343001184e-05, |
| "loss": 0.0, |
| "step": 345500 |
| }, |
| { |
| "epoch": 0.5523393406888119, |
| "grad_norm": 0.00013342987222131342, |
| "learning_rate": 2.4710117828959472e-05, |
| "loss": 0.0, |
| "step": 346000 |
| }, |
| { |
| "epoch": 0.5531375189268015, |
| "grad_norm": 0.00031549722189083695, |
| "learning_rate": 2.469577003885726e-05, |
| "loss": 0.0, |
| "step": 346500 |
| }, |
| { |
| "epoch": 0.5539356971647911, |
| "grad_norm": 2732.885498046875, |
| "learning_rate": 2.468140699526664e-05, |
| "loss": 0.0, |
| "step": 347000 |
| }, |
| { |
| "epoch": 0.5547338754027807, |
| "grad_norm": 0.03750293329358101, |
| "learning_rate": 2.4667028720783712e-05, |
| "loss": 0.0, |
| "step": 347500 |
| }, |
| { |
| "epoch": 0.5555320536407703, |
| "grad_norm": 0.0010069627314805984, |
| "learning_rate": 2.465263523802853e-05, |
| "loss": 0.0, |
| "step": 348000 |
| }, |
| { |
| "epoch": 0.55633023187876, |
| "grad_norm": 0.00019862744375132024, |
| "learning_rate": 2.463822656964506e-05, |
| "loss": 0.0, |
| "step": 348500 |
| }, |
| { |
| "epoch": 0.5571284101167495, |
| "grad_norm": 0.0008133440860547125, |
| "learning_rate": 2.4623802738301183e-05, |
| "loss": 0.0, |
| "step": 349000 |
| }, |
| { |
| "epoch": 0.5579265883547392, |
| "grad_norm": 0.00027964115724898875, |
| "learning_rate": 2.4609363766688627e-05, |
| "loss": 0.0, |
| "step": 349500 |
| }, |
| { |
| "epoch": 0.5587247665927287, |
| "grad_norm": 0.0001666530006332323, |
| "learning_rate": 2.4594909677522934e-05, |
| "loss": 0.0, |
| "step": 350000 |
| }, |
| { |
| "epoch": 0.5595229448307184, |
| "grad_norm": 0.00025385370827279985, |
| "learning_rate": 2.458044049354342e-05, |
| "loss": 0.0, |
| "step": 350500 |
| }, |
| { |
| "epoch": 0.560321123068708, |
| "grad_norm": 0.00013368998770602047, |
| "learning_rate": 2.4565956237513173e-05, |
| "loss": 0.0, |
| "step": 351000 |
| }, |
| { |
| "epoch": 0.5611193013066976, |
| "grad_norm": 0.4133665859699249, |
| "learning_rate": 2.4551456932218966e-05, |
| "loss": 0.0, |
| "step": 351500 |
| }, |
| { |
| "epoch": 0.5619174795446872, |
| "grad_norm": 0.14180496335029602, |
| "learning_rate": 2.453694260047127e-05, |
| "loss": 0.0, |
| "step": 352000 |
| }, |
| { |
| "epoch": 0.5627156577826768, |
| "grad_norm": 0.00047657452523708344, |
| "learning_rate": 2.4522413265104182e-05, |
| "loss": 0.0, |
| "step": 352500 |
| }, |
| { |
| "epoch": 0.5635138360206664, |
| "grad_norm": 0.0002391609741607681, |
| "learning_rate": 2.4507868948975404e-05, |
| "loss": 0.0, |
| "step": 353000 |
| }, |
| { |
| "epoch": 0.5643120142586561, |
| "grad_norm": 0.0003856563416775316, |
| "learning_rate": 2.449330967496621e-05, |
| "loss": 0.0, |
| "step": 353500 |
| }, |
| { |
| "epoch": 0.5651101924966456, |
| "grad_norm": 0.004248655401170254, |
| "learning_rate": 2.4478735465981412e-05, |
| "loss": 0.0, |
| "step": 354000 |
| }, |
| { |
| "epoch": 0.5659083707346353, |
| "grad_norm": 0.028849566355347633, |
| "learning_rate": 2.4464146344949303e-05, |
| "loss": 0.0, |
| "step": 354500 |
| }, |
| { |
| "epoch": 0.5667065489726248, |
| "grad_norm": 0.0003297879302408546, |
| "learning_rate": 2.444954233482164e-05, |
| "loss": 0.0, |
| "step": 355000 |
| }, |
| { |
| "epoch": 0.5675047272106145, |
| "grad_norm": 0.0020440176595002413, |
| "learning_rate": 2.4434923458573617e-05, |
| "loss": 0.0, |
| "step": 355500 |
| }, |
| { |
| "epoch": 0.5683029054486041, |
| "grad_norm": 0.0003888048813678324, |
| "learning_rate": 2.442028973920379e-05, |
| "loss": 0.0, |
| "step": 356000 |
| }, |
| { |
| "epoch": 0.5691010836865937, |
| "grad_norm": 0.0005131899379193783, |
| "learning_rate": 2.44056411997341e-05, |
| "loss": 0.0, |
| "step": 356500 |
| }, |
| { |
| "epoch": 0.5698992619245833, |
| "grad_norm": 0.0008440042147412896, |
| "learning_rate": 2.4390977863209777e-05, |
| "loss": 0.0, |
| "step": 357000 |
| }, |
| { |
| "epoch": 0.570697440162573, |
| "grad_norm": 0.0008664605556987226, |
| "learning_rate": 2.437629975269933e-05, |
| "loss": 0.0, |
| "step": 357500 |
| }, |
| { |
| "epoch": 0.5714956184005625, |
| "grad_norm": 0.0005484423600137234, |
| "learning_rate": 2.4361606891294532e-05, |
| "loss": 0.0, |
| "step": 358000 |
| }, |
| { |
| "epoch": 0.5722937966385522, |
| "grad_norm": 0.0016136858612298965, |
| "learning_rate": 2.4346899302110336e-05, |
| "loss": 0.0, |
| "step": 358500 |
| }, |
| { |
| "epoch": 0.5730919748765417, |
| "grad_norm": 0.0021964486222714186, |
| "learning_rate": 2.4332177008284888e-05, |
| "loss": 0.0, |
| "step": 359000 |
| }, |
| { |
| "epoch": 0.5738901531145314, |
| "grad_norm": 0.0003874083631671965, |
| "learning_rate": 2.4317440032979446e-05, |
| "loss": 0.0, |
| "step": 359500 |
| }, |
| { |
| "epoch": 0.574688331352521, |
| "grad_norm": 0.0002162757737096399, |
| "learning_rate": 2.430268839937839e-05, |
| "loss": 0.0, |
| "step": 360000 |
| }, |
| { |
| "epoch": 0.5754865095905106, |
| "grad_norm": 0.0002551154757384211, |
| "learning_rate": 2.428792213068914e-05, |
| "loss": 0.0, |
| "step": 360500 |
| }, |
| { |
| "epoch": 0.5762846878285003, |
| "grad_norm": 0.00028797570848837495, |
| "learning_rate": 2.427314125014214e-05, |
| "loss": 0.0, |
| "step": 361000 |
| }, |
| { |
| "epoch": 0.5770828660664898, |
| "grad_norm": 0.0003449781215749681, |
| "learning_rate": 2.4258345780990833e-05, |
| "loss": 0.0, |
| "step": 361500 |
| }, |
| { |
| "epoch": 0.5778810443044795, |
| "grad_norm": 0.0002227453514933586, |
| "learning_rate": 2.4243535746511615e-05, |
| "loss": 0.0, |
| "step": 362000 |
| }, |
| { |
| "epoch": 0.5786792225424691, |
| "grad_norm": 0.0005646930076181889, |
| "learning_rate": 2.4228711170003782e-05, |
| "loss": 0.0, |
| "step": 362500 |
| }, |
| { |
| "epoch": 0.5794774007804587, |
| "grad_norm": 0.00017032682080753148, |
| "learning_rate": 2.4213872074789518e-05, |
| "loss": 0.0, |
| "step": 363000 |
| }, |
| { |
| "epoch": 0.5802755790184483, |
| "grad_norm": 0.00020792830036953092, |
| "learning_rate": 2.4199018484213844e-05, |
| "loss": 0.0, |
| "step": 363500 |
| }, |
| { |
| "epoch": 0.581073757256438, |
| "grad_norm": 0.00023471614986192435, |
| "learning_rate": 2.4184150421644586e-05, |
| "loss": 0.0, |
| "step": 364000 |
| }, |
| { |
| "epoch": 0.5818719354944275, |
| "grad_norm": 0.0009064020705409348, |
| "learning_rate": 2.4169267910472336e-05, |
| "loss": 0.0, |
| "step": 364500 |
| }, |
| { |
| "epoch": 0.5826701137324172, |
| "grad_norm": 0.00018221262143924832, |
| "learning_rate": 2.4154370974110425e-05, |
| "loss": 0.0, |
| "step": 365000 |
| }, |
| { |
| "epoch": 0.5834682919704067, |
| "grad_norm": 0.2526322305202484, |
| "learning_rate": 2.4139459635994864e-05, |
| "loss": 0.0, |
| "step": 365500 |
| }, |
| { |
| "epoch": 0.5842664702083964, |
| "grad_norm": 64.04448699951172, |
| "learning_rate": 2.412453391958434e-05, |
| "loss": 0.0, |
| "step": 366000 |
| }, |
| { |
| "epoch": 0.585064648446386, |
| "grad_norm": 0.0006586963427253067, |
| "learning_rate": 2.4109593848360137e-05, |
| "loss": 0.0, |
| "step": 366500 |
| }, |
| { |
| "epoch": 0.5858628266843756, |
| "grad_norm": 0.003046433674171567, |
| "learning_rate": 2.4094639445826134e-05, |
| "loss": 0.0, |
| "step": 367000 |
| }, |
| { |
| "epoch": 0.5866610049223652, |
| "grad_norm": 0.0009702751412987709, |
| "learning_rate": 2.4079670735508765e-05, |
| "loss": 0.0, |
| "step": 367500 |
| }, |
| { |
| "epoch": 0.5874591831603548, |
| "grad_norm": 0.0007255422533489764, |
| "learning_rate": 2.4064687740956956e-05, |
| "loss": 0.0, |
| "step": 368000 |
| }, |
| { |
| "epoch": 0.5882573613983444, |
| "grad_norm": 0.000522131216712296, |
| "learning_rate": 2.4049690485742116e-05, |
| "loss": 0.0, |
| "step": 368500 |
| }, |
| { |
| "epoch": 0.5890555396363341, |
| "grad_norm": 0.0017868474824354053, |
| "learning_rate": 2.4034678993458088e-05, |
| "loss": 0.0, |
| "step": 369000 |
| }, |
| { |
| "epoch": 0.5898537178743236, |
| "grad_norm": 0.002390818204730749, |
| "learning_rate": 2.4019653287721105e-05, |
| "loss": 0.0, |
| "step": 369500 |
| }, |
| { |
| "epoch": 0.5906518961123133, |
| "grad_norm": 0.0002911436022259295, |
| "learning_rate": 2.400461339216978e-05, |
| "loss": 0.0, |
| "step": 370000 |
| }, |
| { |
| "epoch": 0.5914500743503028, |
| "grad_norm": 0.0005996805848553777, |
| "learning_rate": 2.3989559330465018e-05, |
| "loss": 0.0, |
| "step": 370500 |
| }, |
| { |
| "epoch": 0.5922482525882925, |
| "grad_norm": 0.0002952713402919471, |
| "learning_rate": 2.3974491126290042e-05, |
| "loss": 0.0, |
| "step": 371000 |
| }, |
| { |
| "epoch": 0.5930464308262821, |
| "grad_norm": 0.0002812375605572015, |
| "learning_rate": 2.3959408803350304e-05, |
| "loss": 0.0, |
| "step": 371500 |
| }, |
| { |
| "epoch": 0.5938446090642717, |
| "grad_norm": 0.0002432354522170499, |
| "learning_rate": 2.3944312385373475e-05, |
| "loss": 0.0, |
| "step": 372000 |
| }, |
| { |
| "epoch": 0.5946427873022613, |
| "grad_norm": 0.002821897389367223, |
| "learning_rate": 2.392920189610941e-05, |
| "loss": 0.0, |
| "step": 372500 |
| }, |
| { |
| "epoch": 0.5954409655402509, |
| "grad_norm": 0.003000877797603607, |
| "learning_rate": 2.3914077359330088e-05, |
| "loss": 0.0, |
| "step": 373000 |
| }, |
| { |
| "epoch": 0.5962391437782405, |
| "grad_norm": 0.000387115083867684, |
| "learning_rate": 2.3898938798829576e-05, |
| "loss": 0.0, |
| "step": 373500 |
| }, |
| { |
| "epoch": 0.5970373220162302, |
| "grad_norm": 0.0008274815627373755, |
| "learning_rate": 2.3883786238424035e-05, |
| "loss": 0.0, |
| "step": 374000 |
| }, |
| { |
| "epoch": 0.5978355002542197, |
| "grad_norm": 0.00043856215779669583, |
| "learning_rate": 2.3868619701951625e-05, |
| "loss": 0.0, |
| "step": 374500 |
| }, |
| { |
| "epoch": 0.5986336784922094, |
| "grad_norm": 0.000386549363611266, |
| "learning_rate": 2.3853439213272506e-05, |
| "loss": 0.0, |
| "step": 375000 |
| }, |
| { |
| "epoch": 0.5994318567301989, |
| "grad_norm": 0.0003421735018491745, |
| "learning_rate": 2.383824479626878e-05, |
| "loss": 0.0, |
| "step": 375500 |
| }, |
| { |
| "epoch": 0.6002300349681886, |
| "grad_norm": 0.00029982542037032545, |
| "learning_rate": 2.382303647484448e-05, |
| "loss": 0.0, |
| "step": 376000 |
| }, |
| { |
| "epoch": 0.6010282132061782, |
| "grad_norm": 0.0012008086778223515, |
| "learning_rate": 2.3807814272925475e-05, |
| "loss": 0.0, |
| "step": 376500 |
| }, |
| { |
| "epoch": 0.6018263914441678, |
| "grad_norm": 84.65890502929688, |
| "learning_rate": 2.3792578214459513e-05, |
| "loss": 0.0, |
| "step": 377000 |
| }, |
| { |
| "epoch": 0.6026245696821575, |
| "grad_norm": 0.0005028890445828438, |
| "learning_rate": 2.3777328323416116e-05, |
| "loss": 0.0, |
| "step": 377500 |
| }, |
| { |
| "epoch": 0.603422747920147, |
| "grad_norm": 0.00019518673070706427, |
| "learning_rate": 2.3762064623786578e-05, |
| "loss": 0.0, |
| "step": 378000 |
| }, |
| { |
| "epoch": 0.6042209261581367, |
| "grad_norm": 0.0307555440813303, |
| "learning_rate": 2.3746787139583903e-05, |
| "loss": 0.0, |
| "step": 378500 |
| }, |
| { |
| "epoch": 0.6050191043961263, |
| "grad_norm": 410.12652587890625, |
| "learning_rate": 2.3731495894842808e-05, |
| "loss": 0.0, |
| "step": 379000 |
| }, |
| { |
| "epoch": 0.6058172826341159, |
| "grad_norm": 0.02829531952738762, |
| "learning_rate": 2.371619091361963e-05, |
| "loss": 0.0, |
| "step": 379500 |
| }, |
| { |
| "epoch": 0.6066154608721055, |
| "grad_norm": 0.0006308479933068156, |
| "learning_rate": 2.370087221999233e-05, |
| "loss": 0.0, |
| "step": 380000 |
| }, |
| { |
| "epoch": 0.6074136391100952, |
| "grad_norm": 0.0002925604931078851, |
| "learning_rate": 2.3685539838060445e-05, |
| "loss": 0.0, |
| "step": 380500 |
| }, |
| { |
| "epoch": 0.6082118173480847, |
| "grad_norm": 0.0003732262703124434, |
| "learning_rate": 2.3670193791945028e-05, |
| "loss": 0.0, |
| "step": 381000 |
| }, |
| { |
| "epoch": 0.6090099955860744, |
| "grad_norm": 0.0002686434891074896, |
| "learning_rate": 2.3654834105788658e-05, |
| "loss": 0.0, |
| "step": 381500 |
| }, |
| { |
| "epoch": 0.6098081738240639, |
| "grad_norm": 0.0002781795628834516, |
| "learning_rate": 2.363946080375534e-05, |
| "loss": 0.0, |
| "step": 382000 |
| }, |
| { |
| "epoch": 0.6106063520620536, |
| "grad_norm": 0.003745084395632148, |
| "learning_rate": 2.3624073910030537e-05, |
| "loss": 0.0, |
| "step": 382500 |
| }, |
| { |
| "epoch": 0.6114045303000432, |
| "grad_norm": 0.00020644953474402428, |
| "learning_rate": 2.3608673448821054e-05, |
| "loss": 0.0, |
| "step": 383000 |
| }, |
| { |
| "epoch": 0.6122027085380328, |
| "grad_norm": 0.0007499917992390692, |
| "learning_rate": 2.359325944435507e-05, |
| "loss": 0.0, |
| "step": 383500 |
| }, |
| { |
| "epoch": 0.6130008867760224, |
| "grad_norm": 0.0001678672997513786, |
| "learning_rate": 2.3577831920882058e-05, |
| "loss": 0.0, |
| "step": 384000 |
| }, |
| { |
| "epoch": 0.613799065014012, |
| "grad_norm": 0.002954375697299838, |
| "learning_rate": 2.3562390902672762e-05, |
| "loss": 0.0, |
| "step": 384500 |
| }, |
| { |
| "epoch": 0.6145972432520016, |
| "grad_norm": 0.00037960358895361423, |
| "learning_rate": 2.3546936414019152e-05, |
| "loss": 0.0, |
| "step": 385000 |
| }, |
| { |
| "epoch": 0.6153954214899913, |
| "grad_norm": 0.004529908765107393, |
| "learning_rate": 2.35314684792344e-05, |
| "loss": 0.0, |
| "step": 385500 |
| }, |
| { |
| "epoch": 0.6161935997279808, |
| "grad_norm": 0.0032500068191438913, |
| "learning_rate": 2.3515987122652828e-05, |
| "loss": 0.0, |
| "step": 386000 |
| }, |
| { |
| "epoch": 0.6169917779659705, |
| "grad_norm": 0.0002675579162314534, |
| "learning_rate": 2.3500492368629858e-05, |
| "loss": 0.0, |
| "step": 386500 |
| }, |
| { |
| "epoch": 0.61778995620396, |
| "grad_norm": 0.013961972668766975, |
| "learning_rate": 2.348498424154201e-05, |
| "loss": 0.0, |
| "step": 387000 |
| }, |
| { |
| "epoch": 0.6185881344419497, |
| "grad_norm": 0.0020743459463119507, |
| "learning_rate": 2.3469462765786833e-05, |
| "loss": 0.0, |
| "step": 387500 |
| }, |
| { |
| "epoch": 0.6193863126799393, |
| "grad_norm": 0.0005027143633924425, |
| "learning_rate": 2.345392796578288e-05, |
| "loss": 0.0, |
| "step": 388000 |
| }, |
| { |
| "epoch": 0.6201844909179289, |
| "grad_norm": 0.0003365549782756716, |
| "learning_rate": 2.343837986596966e-05, |
| "loss": 0.0, |
| "step": 388500 |
| }, |
| { |
| "epoch": 0.6209826691559185, |
| "grad_norm": 0.0002937042445410043, |
| "learning_rate": 2.3422818490807615e-05, |
| "loss": 0.0, |
| "step": 389000 |
| }, |
| { |
| "epoch": 0.6217808473939082, |
| "grad_norm": 0.000486930541228503, |
| "learning_rate": 2.3407243864778053e-05, |
| "loss": 0.0, |
| "step": 389500 |
| }, |
| { |
| "epoch": 0.6225790256318977, |
| "grad_norm": 0.00019580399384722114, |
| "learning_rate": 2.3391656012383152e-05, |
| "loss": 0.0, |
| "step": 390000 |
| }, |
| { |
| "epoch": 0.6233772038698874, |
| "grad_norm": 0.3469190001487732, |
| "learning_rate": 2.3376054958145884e-05, |
| "loss": 0.0, |
| "step": 390500 |
| }, |
| { |
| "epoch": 0.6241753821078769, |
| "grad_norm": 256.0212707519531, |
| "learning_rate": 2.3360440726609992e-05, |
| "loss": 0.0, |
| "step": 391000 |
| }, |
| { |
| "epoch": 0.6249735603458666, |
| "grad_norm": 0.0003373539075255394, |
| "learning_rate": 2.3344813342339952e-05, |
| "loss": 0.0, |
| "step": 391500 |
| }, |
| { |
| "epoch": 0.6257717385838562, |
| "grad_norm": 0.00019939610501751304, |
| "learning_rate": 2.332917282992093e-05, |
| "loss": 0.0, |
| "step": 392000 |
| }, |
| { |
| "epoch": 0.6265699168218458, |
| "grad_norm": 0.00017603930609766394, |
| "learning_rate": 2.3313519213958745e-05, |
| "loss": 0.0, |
| "step": 392500 |
| }, |
| { |
| "epoch": 0.6273680950598355, |
| "grad_norm": 0.00020907670841552317, |
| "learning_rate": 2.3297852519079837e-05, |
| "loss": 0.0, |
| "step": 393000 |
| }, |
| { |
| "epoch": 0.628166273297825, |
| "grad_norm": 0.0003900097217410803, |
| "learning_rate": 2.3282172769931213e-05, |
| "loss": 0.0, |
| "step": 393500 |
| }, |
| { |
| "epoch": 0.6289644515358147, |
| "grad_norm": 3.75453782081604, |
| "learning_rate": 2.326647999118042e-05, |
| "loss": 0.0, |
| "step": 394000 |
| }, |
| { |
| "epoch": 0.6297626297738043, |
| "grad_norm": 0.013970088213682175, |
| "learning_rate": 2.325077420751551e-05, |
| "loss": 0.0, |
| "step": 394500 |
| }, |
| { |
| "epoch": 0.6305608080117939, |
| "grad_norm": 0.00030075875110924244, |
| "learning_rate": 2.323505544364498e-05, |
| "loss": 0.0, |
| "step": 395000 |
| }, |
| { |
| "epoch": 0.6313589862497835, |
| "grad_norm": 0.0005138195701874793, |
| "learning_rate": 2.321932372429776e-05, |
| "loss": 0.0, |
| "step": 395500 |
| }, |
| { |
| "epoch": 0.6321571644877731, |
| "grad_norm": 178.2792510986328, |
| "learning_rate": 2.3203579074223158e-05, |
| "loss": 0.0001, |
| "step": 396000 |
| }, |
| { |
| "epoch": 0.6329553427257627, |
| "grad_norm": 0.0002129318891093135, |
| "learning_rate": 2.3187821518190826e-05, |
| "loss": 0.0, |
| "step": 396500 |
| }, |
| { |
| "epoch": 0.6337535209637524, |
| "grad_norm": 0.08376732468605042, |
| "learning_rate": 2.317205108099072e-05, |
| "loss": 0.0, |
| "step": 397000 |
| }, |
| { |
| "epoch": 0.6345516992017419, |
| "grad_norm": 0.0006943497573956847, |
| "learning_rate": 2.3156267787433056e-05, |
| "loss": 0.0, |
| "step": 397500 |
| }, |
| { |
| "epoch": 0.6353498774397316, |
| "grad_norm": 0.0002545543829910457, |
| "learning_rate": 2.3140471662348283e-05, |
| "loss": 0.0, |
| "step": 398000 |
| }, |
| { |
| "epoch": 0.6361480556777211, |
| "grad_norm": 0.0022383469622582197, |
| "learning_rate": 2.3124662730587027e-05, |
| "loss": 0.0, |
| "step": 398500 |
| }, |
| { |
| "epoch": 0.6369462339157108, |
| "grad_norm": 0.00022144192189443856, |
| "learning_rate": 2.310884101702007e-05, |
| "loss": 0.0, |
| "step": 399000 |
| }, |
| { |
| "epoch": 0.6377444121537004, |
| "grad_norm": 0.01328088715672493, |
| "learning_rate": 2.30930065465383e-05, |
| "loss": 0.0, |
| "step": 399500 |
| }, |
| { |
| "epoch": 0.63854259039169, |
| "grad_norm": 0.00027457988471724093, |
| "learning_rate": 2.3077159344052675e-05, |
| "loss": 0.0, |
| "step": 400000 |
| }, |
| { |
| "epoch": 0.6393407686296796, |
| "grad_norm": 0.00017897885118145496, |
| "learning_rate": 2.306129943449418e-05, |
| "loss": 0.0, |
| "step": 400500 |
| }, |
| { |
| "epoch": 0.6401389468676693, |
| "grad_norm": 0.00017138413386419415, |
| "learning_rate": 2.3045426842813797e-05, |
| "loss": 0.0, |
| "step": 401000 |
| }, |
| { |
| "epoch": 0.6409371251056588, |
| "grad_norm": 0.00025630032178014517, |
| "learning_rate": 2.3029541593982453e-05, |
| "loss": 0.0, |
| "step": 401500 |
| }, |
| { |
| "epoch": 0.6417353033436485, |
| "grad_norm": 0.0002913358330260962, |
| "learning_rate": 2.3013643712990987e-05, |
| "loss": 0.0, |
| "step": 402000 |
| }, |
| { |
| "epoch": 0.642533481581638, |
| "grad_norm": 0.00017182863666675985, |
| "learning_rate": 2.2997733224850126e-05, |
| "loss": 0.0, |
| "step": 402500 |
| }, |
| { |
| "epoch": 0.6433316598196277, |
| "grad_norm": 0.0005597301642410457, |
| "learning_rate": 2.2981810154590402e-05, |
| "loss": 0.0, |
| "step": 403000 |
| }, |
| { |
| "epoch": 0.6441298380576173, |
| "grad_norm": 0.0002503639261703938, |
| "learning_rate": 2.2965874527262172e-05, |
| "loss": 0.0, |
| "step": 403500 |
| }, |
| { |
| "epoch": 0.6449280162956069, |
| "grad_norm": 0.0002138703566743061, |
| "learning_rate": 2.2949926367935527e-05, |
| "loss": 0.0, |
| "step": 404000 |
| }, |
| { |
| "epoch": 0.6457261945335965, |
| "grad_norm": 0.0001715560065349564, |
| "learning_rate": 2.2933965701700286e-05, |
| "loss": 0.0, |
| "step": 404500 |
| }, |
| { |
| "epoch": 0.6465243727715861, |
| "grad_norm": 0.00013465856318362057, |
| "learning_rate": 2.2917992553665937e-05, |
| "loss": 0.0, |
| "step": 405000 |
| }, |
| { |
| "epoch": 0.6473225510095757, |
| "grad_norm": 0.0001602515549166128, |
| "learning_rate": 2.2902006948961597e-05, |
| "loss": 0.0, |
| "step": 405500 |
| }, |
| { |
| "epoch": 0.6481207292475654, |
| "grad_norm": 0.00034399403375573456, |
| "learning_rate": 2.2886008912736e-05, |
| "loss": 0.0, |
| "step": 406000 |
| }, |
| { |
| "epoch": 0.6489189074855549, |
| "grad_norm": 0.00011978346446994692, |
| "learning_rate": 2.286999847015743e-05, |
| "loss": 0.0, |
| "step": 406500 |
| }, |
| { |
| "epoch": 0.6497170857235446, |
| "grad_norm": 0.00019513712322805077, |
| "learning_rate": 2.2853975646413668e-05, |
| "loss": 0.0, |
| "step": 407000 |
| }, |
| { |
| "epoch": 0.6505152639615341, |
| "grad_norm": 0.001562693272717297, |
| "learning_rate": 2.2837940466712003e-05, |
| "loss": 0.0, |
| "step": 407500 |
| }, |
| { |
| "epoch": 0.6513134421995238, |
| "grad_norm": 0.0001763744803611189, |
| "learning_rate": 2.2821892956279154e-05, |
| "loss": 0.0, |
| "step": 408000 |
| }, |
| { |
| "epoch": 0.6521116204375135, |
| "grad_norm": 0.00090842938516289, |
| "learning_rate": 2.2805833140361228e-05, |
| "loss": 0.0, |
| "step": 408500 |
| }, |
| { |
| "epoch": 0.652909798675503, |
| "grad_norm": 0.003621726995334029, |
| "learning_rate": 2.2789761044223695e-05, |
| "loss": 0.0, |
| "step": 409000 |
| }, |
| { |
| "epoch": 0.6537079769134927, |
| "grad_norm": 0.00015422285650856793, |
| "learning_rate": 2.2773676693151353e-05, |
| "loss": 0.0, |
| "step": 409500 |
| }, |
| { |
| "epoch": 0.6545061551514822, |
| "grad_norm": 0.0012531452812254429, |
| "learning_rate": 2.275758011244827e-05, |
| "loss": 0.0, |
| "step": 410000 |
| }, |
| { |
| "epoch": 0.6553043333894719, |
| "grad_norm": 0.0006565919611603022, |
| "learning_rate": 2.2741471327437767e-05, |
| "loss": 0.0, |
| "step": 410500 |
| }, |
| { |
| "epoch": 0.6561025116274615, |
| "grad_norm": 0.00330674322322011, |
| "learning_rate": 2.2725350363462343e-05, |
| "loss": 0.0, |
| "step": 411000 |
| }, |
| { |
| "epoch": 0.6569006898654511, |
| "grad_norm": 0.0005322208162397146, |
| "learning_rate": 2.270921724588368e-05, |
| "loss": 0.0, |
| "step": 411500 |
| }, |
| { |
| "epoch": 0.6576988681034407, |
| "grad_norm": 0.00018437649123370647, |
| "learning_rate": 2.269307200008256e-05, |
| "loss": 0.0, |
| "step": 412000 |
| }, |
| { |
| "epoch": 0.6584970463414304, |
| "grad_norm": 0.00015154003631323576, |
| "learning_rate": 2.267691465145886e-05, |
| "loss": 0.0, |
| "step": 412500 |
| }, |
| { |
| "epoch": 0.6592952245794199, |
| "grad_norm": 0.00023861938097979873, |
| "learning_rate": 2.2660745225431494e-05, |
| "loss": 0.0, |
| "step": 413000 |
| }, |
| { |
| "epoch": 0.6600934028174096, |
| "grad_norm": 0.0001982437534024939, |
| "learning_rate": 2.2644563747438375e-05, |
| "loss": 0.0, |
| "step": 413500 |
| }, |
| { |
| "epoch": 0.6608915810553991, |
| "grad_norm": 0.00024549709632992744, |
| "learning_rate": 2.2628370242936377e-05, |
| "loss": 0.0, |
| "step": 414000 |
| }, |
| { |
| "epoch": 0.6616897592933888, |
| "grad_norm": 0.000128219326143153, |
| "learning_rate": 2.2612164737401288e-05, |
| "loss": 0.0, |
| "step": 414500 |
| }, |
| { |
| "epoch": 0.6624879375313784, |
| "grad_norm": 0.00017211749218404293, |
| "learning_rate": 2.2595947256327786e-05, |
| "loss": 0.0, |
| "step": 415000 |
| }, |
| { |
| "epoch": 0.663286115769368, |
| "grad_norm": 0.0005725378287024796, |
| "learning_rate": 2.2579717825229384e-05, |
| "loss": 0.0, |
| "step": 415500 |
| }, |
| { |
| "epoch": 0.6640842940073576, |
| "grad_norm": 0.002562319627031684, |
| "learning_rate": 2.256347646963839e-05, |
| "loss": 0.0, |
| "step": 416000 |
| }, |
| { |
| "epoch": 0.6648824722453472, |
| "grad_norm": 0.0054826377891004086, |
| "learning_rate": 2.254722321510588e-05, |
| "loss": 0.0, |
| "step": 416500 |
| }, |
| { |
| "epoch": 0.6656806504833368, |
| "grad_norm": 0.04357144236564636, |
| "learning_rate": 2.2530958087201656e-05, |
| "loss": 0.0, |
| "step": 417000 |
| }, |
| { |
| "epoch": 0.6664788287213265, |
| "grad_norm": 0.002114373492076993, |
| "learning_rate": 2.251468111151418e-05, |
| "loss": 0.0, |
| "step": 417500 |
| }, |
| { |
| "epoch": 0.667277006959316, |
| "grad_norm": 0.0005999524146318436, |
| "learning_rate": 2.249839231365056e-05, |
| "loss": 0.0, |
| "step": 418000 |
| }, |
| { |
| "epoch": 0.6680751851973057, |
| "grad_norm": 0.0004485425597522408, |
| "learning_rate": 2.2482091719236514e-05, |
| "loss": 0.0, |
| "step": 418500 |
| }, |
| { |
| "epoch": 0.6688733634352952, |
| "grad_norm": 0.00020038214279338717, |
| "learning_rate": 2.2465779353916305e-05, |
| "loss": 0.0, |
| "step": 419000 |
| }, |
| { |
| "epoch": 0.6696715416732849, |
| "grad_norm": 0.00029285537311807275, |
| "learning_rate": 2.2449455243352724e-05, |
| "loss": 0.0, |
| "step": 419500 |
| }, |
| { |
| "epoch": 0.6704697199112745, |
| "grad_norm": 0.0003520438331179321, |
| "learning_rate": 2.243311941322703e-05, |
| "loss": 0.0, |
| "step": 420000 |
| }, |
| { |
| "epoch": 0.6712678981492641, |
| "grad_norm": 0.0002213429397670552, |
| "learning_rate": 2.2416771889238928e-05, |
| "loss": 0.0, |
| "step": 420500 |
| }, |
| { |
| "epoch": 0.6720660763872537, |
| "grad_norm": 0.00015232243458740413, |
| "learning_rate": 2.240041269710652e-05, |
| "loss": 0.0, |
| "step": 421000 |
| }, |
| { |
| "epoch": 0.6728642546252434, |
| "grad_norm": 0.0004849177203141153, |
| "learning_rate": 2.2384041862566254e-05, |
| "loss": 0.0, |
| "step": 421500 |
| }, |
| { |
| "epoch": 0.6736624328632329, |
| "grad_norm": 0.00021231337450444698, |
| "learning_rate": 2.23676594113729e-05, |
| "loss": 0.0, |
| "step": 422000 |
| }, |
| { |
| "epoch": 0.6744606111012226, |
| "grad_norm": 0.0001643193099880591, |
| "learning_rate": 2.235126536929951e-05, |
| "loss": 0.0, |
| "step": 422500 |
| }, |
| { |
| "epoch": 0.6752587893392121, |
| "grad_norm": 0.00012516920105554163, |
| "learning_rate": 2.2334859762137362e-05, |
| "loss": 0.0, |
| "step": 423000 |
| }, |
| { |
| "epoch": 0.6760569675772018, |
| "grad_norm": 0.0005767050897702575, |
| "learning_rate": 2.231844261569593e-05, |
| "loss": 0.0, |
| "step": 423500 |
| }, |
| { |
| "epoch": 0.6768551458151915, |
| "grad_norm": 0.00015193522267509252, |
| "learning_rate": 2.2302013955802847e-05, |
| "loss": 0.0, |
| "step": 424000 |
| }, |
| { |
| "epoch": 0.677653324053181, |
| "grad_norm": 0.00024528297944925725, |
| "learning_rate": 2.228557380830385e-05, |
| "loss": 0.0, |
| "step": 424500 |
| }, |
| { |
| "epoch": 0.6784515022911707, |
| "grad_norm": 0.00018646153330337256, |
| "learning_rate": 2.226912219906276e-05, |
| "loss": 0.0, |
| "step": 425000 |
| }, |
| { |
| "epoch": 0.6792496805291602, |
| "grad_norm": 0.00017168234626296908, |
| "learning_rate": 2.225265915396142e-05, |
| "loss": 0.0, |
| "step": 425500 |
| }, |
| { |
| "epoch": 0.6800478587671499, |
| "grad_norm": 0.00011930393520742655, |
| "learning_rate": 2.2236184698899667e-05, |
| "loss": 0.0, |
| "step": 426000 |
| }, |
| { |
| "epoch": 0.6808460370051395, |
| "grad_norm": 0.0001393697311868891, |
| "learning_rate": 2.2219698859795292e-05, |
| "loss": 0.0, |
| "step": 426500 |
| }, |
| { |
| "epoch": 0.6816442152431291, |
| "grad_norm": 0.00014085869770497084, |
| "learning_rate": 2.220320166258399e-05, |
| "loss": 0.0, |
| "step": 427000 |
| }, |
| { |
| "epoch": 0.6824423934811187, |
| "grad_norm": 0.00041550418245606124, |
| "learning_rate": 2.2186693133219322e-05, |
| "loss": 0.0, |
| "step": 427500 |
| }, |
| { |
| "epoch": 0.6832405717191083, |
| "grad_norm": 0.00016101829532999545, |
| "learning_rate": 2.217017329767269e-05, |
| "loss": 0.0, |
| "step": 428000 |
| }, |
| { |
| "epoch": 0.6840387499570979, |
| "grad_norm": 2.820543050765991, |
| "learning_rate": 2.2153642181933264e-05, |
| "loss": 0.0, |
| "step": 428500 |
| }, |
| { |
| "epoch": 0.6848369281950876, |
| "grad_norm": 0.00014117424143478274, |
| "learning_rate": 2.213709981200798e-05, |
| "loss": 0.0, |
| "step": 429000 |
| }, |
| { |
| "epoch": 0.6856351064330771, |
| "grad_norm": 0.0002793257881421596, |
| "learning_rate": 2.2120546213921473e-05, |
| "loss": 0.0, |
| "step": 429500 |
| }, |
| { |
| "epoch": 0.6864332846710668, |
| "grad_norm": 0.0001950536243384704, |
| "learning_rate": 2.2103981413716033e-05, |
| "loss": 0.0, |
| "step": 430000 |
| }, |
| { |
| "epoch": 0.6872314629090563, |
| "grad_norm": 0.0003645491087809205, |
| "learning_rate": 2.2087405437451577e-05, |
| "loss": 0.0, |
| "step": 430500 |
| }, |
| { |
| "epoch": 0.688029641147046, |
| "grad_norm": 0.00022027833620086312, |
| "learning_rate": 2.2070818311205615e-05, |
| "loss": 0.0, |
| "step": 431000 |
| }, |
| { |
| "epoch": 0.6888278193850356, |
| "grad_norm": 0.001554289017803967, |
| "learning_rate": 2.205422006107318e-05, |
| "loss": 0.0, |
| "step": 431500 |
| }, |
| { |
| "epoch": 0.6896259976230252, |
| "grad_norm": 0.00027029041666537523, |
| "learning_rate": 2.2037610713166828e-05, |
| "loss": 0.0, |
| "step": 432000 |
| }, |
| { |
| "epoch": 0.6904241758610148, |
| "grad_norm": 0.00016059460176620632, |
| "learning_rate": 2.202099029361655e-05, |
| "loss": 0.0, |
| "step": 432500 |
| }, |
| { |
| "epoch": 0.6912223540990045, |
| "grad_norm": 0.0009365231380797923, |
| "learning_rate": 2.2004358828569774e-05, |
| "loss": 0.0, |
| "step": 433000 |
| }, |
| { |
| "epoch": 0.692020532336994, |
| "grad_norm": 0.0001574302586959675, |
| "learning_rate": 2.1987716344191296e-05, |
| "loss": 0.0, |
| "step": 433500 |
| }, |
| { |
| "epoch": 0.6928187105749837, |
| "grad_norm": 0.00016089307609945536, |
| "learning_rate": 2.197106286666324e-05, |
| "loss": 0.0, |
| "step": 434000 |
| }, |
| { |
| "epoch": 0.6936168888129732, |
| "grad_norm": 0.0001953535247594118, |
| "learning_rate": 2.1954398422185052e-05, |
| "loss": 0.0, |
| "step": 434500 |
| }, |
| { |
| "epoch": 0.6944150670509629, |
| "grad_norm": 0.0003318938543088734, |
| "learning_rate": 2.1937723036973396e-05, |
| "loss": 0.0, |
| "step": 435000 |
| }, |
| { |
| "epoch": 0.6952132452889525, |
| "grad_norm": 0.0007233397336676717, |
| "learning_rate": 2.1921036737262177e-05, |
| "loss": 0.0, |
| "step": 435500 |
| }, |
| { |
| "epoch": 0.6960114235269421, |
| "grad_norm": 8.286305092042312e-05, |
| "learning_rate": 2.1904339549302448e-05, |
| "loss": 0.0, |
| "step": 436000 |
| }, |
| { |
| "epoch": 0.6968096017649317, |
| "grad_norm": 7.818207814125344e-05, |
| "learning_rate": 2.188763149936241e-05, |
| "loss": 0.0, |
| "step": 436500 |
| }, |
| { |
| "epoch": 0.6976077800029213, |
| "grad_norm": 0.0001744274777593091, |
| "learning_rate": 2.1870912613727345e-05, |
| "loss": 0.0, |
| "step": 437000 |
| }, |
| { |
| "epoch": 0.6984059582409109, |
| "grad_norm": 0.0002287587121827528, |
| "learning_rate": 2.185418291869958e-05, |
| "loss": 0.0, |
| "step": 437500 |
| }, |
| { |
| "epoch": 0.6992041364789006, |
| "grad_norm": 0.00016285310266539454, |
| "learning_rate": 2.1837442440598445e-05, |
| "loss": 0.0, |
| "step": 438000 |
| }, |
| { |
| "epoch": 0.7000023147168901, |
| "grad_norm": 0.0001769603113643825, |
| "learning_rate": 2.1820691205760242e-05, |
| "loss": 0.0, |
| "step": 438500 |
| }, |
| { |
| "epoch": 0.7008004929548798, |
| "grad_norm": 0.00014560433919541538, |
| "learning_rate": 2.1803929240538193e-05, |
| "loss": 0.0, |
| "step": 439000 |
| }, |
| { |
| "epoch": 0.7015986711928694, |
| "grad_norm": 0.00030399439856410027, |
| "learning_rate": 2.1787156571302395e-05, |
| "loss": 0.0, |
| "step": 439500 |
| }, |
| { |
| "epoch": 0.702396849430859, |
| "grad_norm": 0.0008793527958914638, |
| "learning_rate": 2.177037322443979e-05, |
| "loss": 0.0, |
| "step": 440000 |
| }, |
| { |
| "epoch": 0.7031950276688487, |
| "grad_norm": 0.00017075143114198, |
| "learning_rate": 2.1753579226354126e-05, |
| "loss": 0.0, |
| "step": 440500 |
| }, |
| { |
| "epoch": 0.7039932059068382, |
| "grad_norm": 9.666175174061209e-05, |
| "learning_rate": 2.1736774603465886e-05, |
| "loss": 0.0, |
| "step": 441000 |
| }, |
| { |
| "epoch": 0.7047913841448279, |
| "grad_norm": 0.0006323313573375344, |
| "learning_rate": 2.1719959382212294e-05, |
| "loss": 0.0, |
| "step": 441500 |
| }, |
| { |
| "epoch": 0.7055895623828174, |
| "grad_norm": 38.205810546875, |
| "learning_rate": 2.1703133589047222e-05, |
| "loss": 0.0, |
| "step": 442000 |
| }, |
| { |
| "epoch": 0.7063877406208071, |
| "grad_norm": 0.000583072891458869, |
| "learning_rate": 2.16862972504412e-05, |
| "loss": 0.0, |
| "step": 442500 |
| }, |
| { |
| "epoch": 0.7071859188587967, |
| "grad_norm": 0.002959677018225193, |
| "learning_rate": 2.166945039288132e-05, |
| "loss": 0.0, |
| "step": 443000 |
| }, |
| { |
| "epoch": 0.7079840970967863, |
| "grad_norm": 0.00010426915105199441, |
| "learning_rate": 2.165259304287125e-05, |
| "loss": 0.0, |
| "step": 443500 |
| }, |
| { |
| "epoch": 0.7087822753347759, |
| "grad_norm": 0.0002889011229854077, |
| "learning_rate": 2.1635725226931143e-05, |
| "loss": 0.0, |
| "step": 444000 |
| }, |
| { |
| "epoch": 0.7095804535727656, |
| "grad_norm": 0.000344561121892184, |
| "learning_rate": 2.1618846971597636e-05, |
| "loss": 0.0, |
| "step": 444500 |
| }, |
| { |
| "epoch": 0.7103786318107551, |
| "grad_norm": 0.0001297101262025535, |
| "learning_rate": 2.160195830342377e-05, |
| "loss": 0.0, |
| "step": 445000 |
| }, |
| { |
| "epoch": 0.7111768100487448, |
| "grad_norm": 0.05745575204491615, |
| "learning_rate": 2.1585059248978978e-05, |
| "loss": 0.0, |
| "step": 445500 |
| }, |
| { |
| "epoch": 0.7119749882867343, |
| "grad_norm": 0.0002907444431912154, |
| "learning_rate": 2.156814983484904e-05, |
| "loss": 0.0, |
| "step": 446000 |
| }, |
| { |
| "epoch": 0.712773166524724, |
| "grad_norm": 0.0002803723618853837, |
| "learning_rate": 2.1551230087636018e-05, |
| "loss": 0.0, |
| "step": 446500 |
| }, |
| { |
| "epoch": 0.7135713447627136, |
| "grad_norm": 0.00019375992997083813, |
| "learning_rate": 2.1534300033958244e-05, |
| "loss": 0.0, |
| "step": 447000 |
| }, |
| { |
| "epoch": 0.7143695230007032, |
| "grad_norm": 0.000927963585127145, |
| "learning_rate": 2.151735970045025e-05, |
| "loss": 0.0, |
| "step": 447500 |
| }, |
| { |
| "epoch": 0.7151677012386928, |
| "grad_norm": 0.00018430485215503722, |
| "learning_rate": 2.150040911376276e-05, |
| "loss": 0.0, |
| "step": 448000 |
| }, |
| { |
| "epoch": 0.7159658794766824, |
| "grad_norm": 0.0001356995344394818, |
| "learning_rate": 2.1483448300562604e-05, |
| "loss": 0.0, |
| "step": 448500 |
| }, |
| { |
| "epoch": 0.716764057714672, |
| "grad_norm": 0.0004225323209539056, |
| "learning_rate": 2.1466477287532726e-05, |
| "loss": 0.0, |
| "step": 449000 |
| }, |
| { |
| "epoch": 0.7175622359526617, |
| "grad_norm": 0.00017753643624018878, |
| "learning_rate": 2.1449496101372094e-05, |
| "loss": 0.0, |
| "step": 449500 |
| }, |
| { |
| "epoch": 0.7183604141906512, |
| "grad_norm": 0.0015126933576539159, |
| "learning_rate": 2.1432504768795695e-05, |
| "loss": 0.0, |
| "step": 450000 |
| }, |
| { |
| "epoch": 0.7183604141906512, |
| "eval_loss": 2.1175588699406944e-05, |
| "eval_runtime": 22089.4767, |
| "eval_samples_per_second": 100.831, |
| "eval_steps_per_second": 3.151, |
| "step": 450000 |
| }, |
| { |
| "epoch": 0.7191585924286409, |
| "grad_norm": 0.00032660740544088185, |
| "learning_rate": 2.1415503316534478e-05, |
| "loss": 0.0, |
| "step": 450500 |
| }, |
| { |
| "epoch": 0.7199567706666304, |
| "grad_norm": 0.0006630943971686065, |
| "learning_rate": 2.1398491771335297e-05, |
| "loss": 0.0, |
| "step": 451000 |
| }, |
| { |
| "epoch": 0.7207549489046201, |
| "grad_norm": 0.0002114923991030082, |
| "learning_rate": 2.1381470159960905e-05, |
| "loss": 0.0, |
| "step": 451500 |
| }, |
| { |
| "epoch": 0.7215531271426097, |
| "grad_norm": 0.0004499349743127823, |
| "learning_rate": 2.1364438509189877e-05, |
| "loss": 0.0, |
| "step": 452000 |
| }, |
| { |
| "epoch": 0.7223513053805993, |
| "grad_norm": 0.0001622430863790214, |
| "learning_rate": 2.134739684581659e-05, |
| "loss": 0.0, |
| "step": 452500 |
| }, |
| { |
| "epoch": 0.7231494836185889, |
| "grad_norm": 0.0010711727663874626, |
| "learning_rate": 2.133034519665117e-05, |
| "loss": 0.0, |
| "step": 453000 |
| }, |
| { |
| "epoch": 0.7239476618565786, |
| "grad_norm": 0.0011754089500755072, |
| "learning_rate": 2.1313283588519452e-05, |
| "loss": 0.0, |
| "step": 453500 |
| }, |
| { |
| "epoch": 0.7247458400945681, |
| "grad_norm": 0.00015875005919951946, |
| "learning_rate": 2.129621204826294e-05, |
| "loss": 0.0, |
| "step": 454000 |
| }, |
| { |
| "epoch": 0.7255440183325578, |
| "grad_norm": 291.006103515625, |
| "learning_rate": 2.127913060273875e-05, |
| "loss": 0.0, |
| "step": 454500 |
| }, |
| { |
| "epoch": 0.7263421965705474, |
| "grad_norm": 0.0004946871194988489, |
| "learning_rate": 2.1262039278819618e-05, |
| "loss": 0.0, |
| "step": 455000 |
| }, |
| { |
| "epoch": 0.727140374808537, |
| "grad_norm": 0.00020070774189662188, |
| "learning_rate": 2.124493810339378e-05, |
| "loss": 0.0, |
| "step": 455500 |
| }, |
| { |
| "epoch": 0.7279385530465267, |
| "grad_norm": 0.0009217039914801717, |
| "learning_rate": 2.1227827103364987e-05, |
| "loss": 0.0, |
| "step": 456000 |
| }, |
| { |
| "epoch": 0.7287367312845162, |
| "grad_norm": 49.21879577636719, |
| "learning_rate": 2.121070630565245e-05, |
| "loss": 0.0, |
| "step": 456500 |
| }, |
| { |
| "epoch": 0.7295349095225059, |
| "grad_norm": 0.00036531256046146154, |
| "learning_rate": 2.119357573719078e-05, |
| "loss": 0.0, |
| "step": 457000 |
| }, |
| { |
| "epoch": 0.7303330877604954, |
| "grad_norm": 0.006041168235242367, |
| "learning_rate": 2.1176435424929984e-05, |
| "loss": 0.0, |
| "step": 457500 |
| }, |
| { |
| "epoch": 0.7311312659984851, |
| "grad_norm": 0.00016016997687984258, |
| "learning_rate": 2.115928539583538e-05, |
| "loss": 0.0, |
| "step": 458000 |
| }, |
| { |
| "epoch": 0.7319294442364747, |
| "grad_norm": 9.255438635591418e-05, |
| "learning_rate": 2.1142125676887573e-05, |
| "loss": 0.0, |
| "step": 458500 |
| }, |
| { |
| "epoch": 0.7327276224744643, |
| "grad_norm": 13.3391695022583, |
| "learning_rate": 2.1124956295082408e-05, |
| "loss": 0.0, |
| "step": 459000 |
| }, |
| { |
| "epoch": 0.7335258007124539, |
| "grad_norm": 0.002709623659029603, |
| "learning_rate": 2.110777727743095e-05, |
| "loss": 0.0, |
| "step": 459500 |
| }, |
| { |
| "epoch": 0.7343239789504435, |
| "grad_norm": 0.0001365782372886315, |
| "learning_rate": 2.1090588650959407e-05, |
| "loss": 0.0, |
| "step": 460000 |
| }, |
| { |
| "epoch": 0.7351221571884331, |
| "grad_norm": 0.0019237701781094074, |
| "learning_rate": 2.107339044270911e-05, |
| "loss": 0.0, |
| "step": 460500 |
| }, |
| { |
| "epoch": 0.7359203354264228, |
| "grad_norm": 0.00011767734395107254, |
| "learning_rate": 2.105618267973646e-05, |
| "loss": 0.0, |
| "step": 461000 |
| }, |
| { |
| "epoch": 0.7367185136644123, |
| "grad_norm": 0.00024400111578870565, |
| "learning_rate": 2.10389653891129e-05, |
| "loss": 0.0, |
| "step": 461500 |
| }, |
| { |
| "epoch": 0.737516691902402, |
| "grad_norm": 9.57796219154261e-05, |
| "learning_rate": 2.1021738597924846e-05, |
| "loss": 0.0, |
| "step": 462000 |
| }, |
| { |
| "epoch": 0.7383148701403915, |
| "grad_norm": 0.0005409715231508017, |
| "learning_rate": 2.1004502333273672e-05, |
| "loss": 0.0, |
| "step": 462500 |
| }, |
| { |
| "epoch": 0.7391130483783812, |
| "grad_norm": 0.3090362250804901, |
| "learning_rate": 2.0987256622275653e-05, |
| "loss": 0.0, |
| "step": 463000 |
| }, |
| { |
| "epoch": 0.7399112266163708, |
| "grad_norm": 0.00047693413216620684, |
| "learning_rate": 2.0970001492061924e-05, |
| "loss": 0.0, |
| "step": 463500 |
| }, |
| { |
| "epoch": 0.7407094048543604, |
| "grad_norm": 0.00021121487952768803, |
| "learning_rate": 2.095273696977844e-05, |
| "loss": 0.0, |
| "step": 464000 |
| }, |
| { |
| "epoch": 0.74150758309235, |
| "grad_norm": 0.00015116293798200786, |
| "learning_rate": 2.093546308258593e-05, |
| "loss": 0.0, |
| "step": 464500 |
| }, |
| { |
| "epoch": 0.7423057613303397, |
| "grad_norm": 0.0003296768991276622, |
| "learning_rate": 2.091817985765986e-05, |
| "loss": 0.0, |
| "step": 465000 |
| }, |
| { |
| "epoch": 0.7431039395683292, |
| "grad_norm": 0.000263060734141618, |
| "learning_rate": 2.090088732219038e-05, |
| "loss": 0.0, |
| "step": 465500 |
| }, |
| { |
| "epoch": 0.7439021178063189, |
| "grad_norm": 0.00016778981080278754, |
| "learning_rate": 2.0883585503382285e-05, |
| "loss": 0.0, |
| "step": 466000 |
| }, |
| { |
| "epoch": 0.7447002960443084, |
| "grad_norm": 0.00032316602300852537, |
| "learning_rate": 2.0866274428454993e-05, |
| "loss": 0.0, |
| "step": 466500 |
| }, |
| { |
| "epoch": 0.7454984742822981, |
| "grad_norm": 0.0310523621737957, |
| "learning_rate": 2.0848954124642457e-05, |
| "loss": 0.0, |
| "step": 467000 |
| }, |
| { |
| "epoch": 0.7462966525202877, |
| "grad_norm": 0.00014277735317591578, |
| "learning_rate": 2.0831624619193175e-05, |
| "loss": 0.0, |
| "step": 467500 |
| }, |
| { |
| "epoch": 0.7470948307582773, |
| "grad_norm": 0.0003888442297466099, |
| "learning_rate": 2.0814285939370103e-05, |
| "loss": 0.0, |
| "step": 468000 |
| }, |
| { |
| "epoch": 0.7478930089962669, |
| "grad_norm": 32.5892448425293, |
| "learning_rate": 2.0796938112450635e-05, |
| "loss": 0.0, |
| "step": 468500 |
| }, |
| { |
| "epoch": 0.7486911872342565, |
| "grad_norm": 0.0001689651544438675, |
| "learning_rate": 2.077958116572656e-05, |
| "loss": 0.0, |
| "step": 469000 |
| }, |
| { |
| "epoch": 0.7494893654722461, |
| "grad_norm": 0.0002664696075953543, |
| "learning_rate": 2.076221512650401e-05, |
| "loss": 0.0, |
| "step": 469500 |
| }, |
| { |
| "epoch": 0.7502875437102358, |
| "grad_norm": 0.00024409177422057837, |
| "learning_rate": 2.074484002210342e-05, |
| "loss": 0.0, |
| "step": 470000 |
| }, |
| { |
| "epoch": 0.7510857219482253, |
| "grad_norm": 0.00021594902500510216, |
| "learning_rate": 2.072745587985949e-05, |
| "loss": 0.0, |
| "step": 470500 |
| }, |
| { |
| "epoch": 0.751883900186215, |
| "grad_norm": 0.000743635231629014, |
| "learning_rate": 2.0710062727121142e-05, |
| "loss": 0.0, |
| "step": 471000 |
| }, |
| { |
| "epoch": 0.7526820784242046, |
| "grad_norm": 0.0020941346883773804, |
| "learning_rate": 2.069266059125146e-05, |
| "loss": 0.0, |
| "step": 471500 |
| }, |
| { |
| "epoch": 0.7534802566621942, |
| "grad_norm": 0.00034860908635891974, |
| "learning_rate": 2.0675249499627675e-05, |
| "loss": 0.0, |
| "step": 472000 |
| }, |
| { |
| "epoch": 0.7542784349001839, |
| "grad_norm": 0.00023366471577901393, |
| "learning_rate": 2.06578294796411e-05, |
| "loss": 0.0, |
| "step": 472500 |
| }, |
| { |
| "epoch": 0.7550766131381734, |
| "grad_norm": 0.0005297983298078179, |
| "learning_rate": 2.0640400558697097e-05, |
| "loss": 0.0, |
| "step": 473000 |
| }, |
| { |
| "epoch": 0.7558747913761631, |
| "grad_norm": 0.000132110042613931, |
| "learning_rate": 2.0622962764215024e-05, |
| "loss": 0.0, |
| "step": 473500 |
| }, |
| { |
| "epoch": 0.7566729696141526, |
| "grad_norm": 8.032079495023936e-05, |
| "learning_rate": 2.0605516123628208e-05, |
| "loss": 0.0, |
| "step": 474000 |
| }, |
| { |
| "epoch": 0.7574711478521423, |
| "grad_norm": 9.18265141081065e-05, |
| "learning_rate": 2.0588060664383896e-05, |
| "loss": 0.0, |
| "step": 474500 |
| }, |
| { |
| "epoch": 0.7582693260901319, |
| "grad_norm": 0.04310398921370506, |
| "learning_rate": 2.057059641394319e-05, |
| "loss": 0.0, |
| "step": 475000 |
| }, |
| { |
| "epoch": 0.7590675043281215, |
| "grad_norm": 0.0002098032709909603, |
| "learning_rate": 2.055312339978104e-05, |
| "loss": 0.0, |
| "step": 475500 |
| }, |
| { |
| "epoch": 0.7598656825661111, |
| "grad_norm": 0.00010694513184716925, |
| "learning_rate": 2.0535641649386177e-05, |
| "loss": 0.0, |
| "step": 476000 |
| }, |
| { |
| "epoch": 0.7606638608041008, |
| "grad_norm": 0.0002394245530012995, |
| "learning_rate": 2.0518151190261075e-05, |
| "loss": 0.0, |
| "step": 476500 |
| }, |
| { |
| "epoch": 0.7614620390420903, |
| "grad_norm": 0.00031559134367853403, |
| "learning_rate": 2.0500652049921917e-05, |
| "loss": 0.0, |
| "step": 477000 |
| }, |
| { |
| "epoch": 0.76226021728008, |
| "grad_norm": 0.0530126690864563, |
| "learning_rate": 2.048314425589853e-05, |
| "loss": 0.0, |
| "step": 477500 |
| }, |
| { |
| "epoch": 0.7630583955180695, |
| "grad_norm": 0.0002214965206803754, |
| "learning_rate": 2.046562783573436e-05, |
| "loss": 0.0, |
| "step": 478000 |
| }, |
| { |
| "epoch": 0.7638565737560592, |
| "grad_norm": 0.0003776904777623713, |
| "learning_rate": 2.0448102816986426e-05, |
| "loss": 0.0, |
| "step": 478500 |
| }, |
| { |
| "epoch": 0.7646547519940488, |
| "grad_norm": 0.0002720048651099205, |
| "learning_rate": 2.0430569227225278e-05, |
| "loss": 0.0, |
| "step": 479000 |
| }, |
| { |
| "epoch": 0.7654529302320384, |
| "grad_norm": 0.00013453431893140078, |
| "learning_rate": 2.0413027094034938e-05, |
| "loss": 0.0, |
| "step": 479500 |
| }, |
| { |
| "epoch": 0.766251108470028, |
| "grad_norm": 0.00026411519502289593, |
| "learning_rate": 2.0395476445012888e-05, |
| "loss": 0.0, |
| "step": 480000 |
| }, |
| { |
| "epoch": 0.7670492867080176, |
| "grad_norm": 0.0005124152521602809, |
| "learning_rate": 2.0377917307769987e-05, |
| "loss": 0.0, |
| "step": 480500 |
| }, |
| { |
| "epoch": 0.7678474649460072, |
| "grad_norm": 0.0005802405066788197, |
| "learning_rate": 2.0360349709930456e-05, |
| "loss": 0.0, |
| "step": 481000 |
| }, |
| { |
| "epoch": 0.7686456431839969, |
| "grad_norm": 0.0003442351007834077, |
| "learning_rate": 2.034277367913183e-05, |
| "loss": 0.0, |
| "step": 481500 |
| }, |
| { |
| "epoch": 0.7694438214219864, |
| "grad_norm": 0.01315320935100317, |
| "learning_rate": 2.0325189243024906e-05, |
| "loss": 0.0, |
| "step": 482000 |
| }, |
| { |
| "epoch": 0.7702419996599761, |
| "grad_norm": 0.0007831249386072159, |
| "learning_rate": 2.0307596429273707e-05, |
| "loss": 0.0, |
| "step": 482500 |
| }, |
| { |
| "epoch": 0.7710401778979656, |
| "grad_norm": 0.0002610905794426799, |
| "learning_rate": 2.0289995265555427e-05, |
| "loss": 0.0, |
| "step": 483000 |
| }, |
| { |
| "epoch": 0.7718383561359553, |
| "grad_norm": 0.0001466882531531155, |
| "learning_rate": 2.0272385779560415e-05, |
| "loss": 0.0, |
| "step": 483500 |
| }, |
| { |
| "epoch": 0.7726365343739449, |
| "grad_norm": 0.00014445210399571806, |
| "learning_rate": 2.0254767998992096e-05, |
| "loss": 0.0, |
| "step": 484000 |
| }, |
| { |
| "epoch": 0.7734347126119345, |
| "grad_norm": 0.0008763981168158352, |
| "learning_rate": 2.023714195156695e-05, |
| "loss": 0.0, |
| "step": 484500 |
| }, |
| { |
| "epoch": 0.7742328908499241, |
| "grad_norm": 0.00019388810324016958, |
| "learning_rate": 2.0219507665014458e-05, |
| "loss": 0.0, |
| "step": 485000 |
| }, |
| { |
| "epoch": 0.7750310690879137, |
| "grad_norm": 5447.041015625, |
| "learning_rate": 2.020186516707707e-05, |
| "loss": 0.0, |
| "step": 485500 |
| }, |
| { |
| "epoch": 0.7758292473259033, |
| "grad_norm": 0.00018875622481573373, |
| "learning_rate": 2.0184214485510155e-05, |
| "loss": 0.0, |
| "step": 486000 |
| }, |
| { |
| "epoch": 0.776627425563893, |
| "grad_norm": 0.0003076701541431248, |
| "learning_rate": 2.0166555648081948e-05, |
| "loss": 0.0, |
| "step": 486500 |
| }, |
| { |
| "epoch": 0.7774256038018826, |
| "grad_norm": 0.00028623873367905617, |
| "learning_rate": 2.0148888682573518e-05, |
| "loss": 0.0, |
| "step": 487000 |
| }, |
| { |
| "epoch": 0.7782237820398722, |
| "grad_norm": 0.00033153523690998554, |
| "learning_rate": 2.013121361677873e-05, |
| "loss": 0.0, |
| "step": 487500 |
| }, |
| { |
| "epoch": 0.7790219602778619, |
| "grad_norm": 0.18243126571178436, |
| "learning_rate": 2.011353047850418e-05, |
| "loss": 0.0, |
| "step": 488000 |
| }, |
| { |
| "epoch": 0.7798201385158514, |
| "grad_norm": 0.0001707931951386854, |
| "learning_rate": 2.009583929556917e-05, |
| "loss": 0.0, |
| "step": 488500 |
| }, |
| { |
| "epoch": 0.7806183167538411, |
| "grad_norm": 0.00017946858133655041, |
| "learning_rate": 2.0078140095805653e-05, |
| "loss": 0.0, |
| "step": 489000 |
| }, |
| { |
| "epoch": 0.7814164949918306, |
| "grad_norm": 0.00014652337995357811, |
| "learning_rate": 2.0060432907058204e-05, |
| "loss": 0.0, |
| "step": 489500 |
| }, |
| { |
| "epoch": 0.7822146732298203, |
| "grad_norm": 0.0001990313030546531, |
| "learning_rate": 2.0042717757183958e-05, |
| "loss": 0.0, |
| "step": 490000 |
| }, |
| { |
| "epoch": 0.7830128514678099, |
| "grad_norm": 0.0002826680720318109, |
| "learning_rate": 2.002499467405258e-05, |
| "loss": 0.0, |
| "step": 490500 |
| }, |
| { |
| "epoch": 0.7838110297057995, |
| "grad_norm": 0.00021341089450288564, |
| "learning_rate": 2.0007263685546207e-05, |
| "loss": 0.0, |
| "step": 491000 |
| }, |
| { |
| "epoch": 0.7846092079437891, |
| "grad_norm": 0.0002723548677749932, |
| "learning_rate": 1.998952481955942e-05, |
| "loss": 0.0, |
| "step": 491500 |
| }, |
| { |
| "epoch": 0.7854073861817787, |
| "grad_norm": 0.00017047197616193444, |
| "learning_rate": 1.9971778103999194e-05, |
| "loss": 0.0, |
| "step": 492000 |
| }, |
| { |
| "epoch": 0.7862055644197683, |
| "grad_norm": 8.363970118807629e-05, |
| "learning_rate": 1.9954023566784848e-05, |
| "loss": 0.0, |
| "step": 492500 |
| }, |
| { |
| "epoch": 0.787003742657758, |
| "grad_norm": 0.00022111626458354294, |
| "learning_rate": 1.9936261235848014e-05, |
| "loss": 0.0, |
| "step": 493000 |
| }, |
| { |
| "epoch": 0.7878019208957475, |
| "grad_norm": 0.00035100660170428455, |
| "learning_rate": 1.9918491139132573e-05, |
| "loss": 0.0, |
| "step": 493500 |
| }, |
| { |
| "epoch": 0.7886000991337372, |
| "grad_norm": 0.00016012179548852146, |
| "learning_rate": 1.990071330459463e-05, |
| "loss": 0.0, |
| "step": 494000 |
| }, |
| { |
| "epoch": 0.7893982773717267, |
| "grad_norm": 0.00026644032914191484, |
| "learning_rate": 1.9882927760202464e-05, |
| "loss": 0.0, |
| "step": 494500 |
| }, |
| { |
| "epoch": 0.7901964556097164, |
| "grad_norm": 0.0001790735695976764, |
| "learning_rate": 1.9865134533936485e-05, |
| "loss": 0.0, |
| "step": 495000 |
| }, |
| { |
| "epoch": 0.790994633847706, |
| "grad_norm": 0.0007223137654364109, |
| "learning_rate": 1.9847333653789186e-05, |
| "loss": 0.0, |
| "step": 495500 |
| }, |
| { |
| "epoch": 0.7917928120856956, |
| "grad_norm": 0.00024101993767544627, |
| "learning_rate": 1.9829525147765096e-05, |
| "loss": 0.0, |
| "step": 496000 |
| }, |
| { |
| "epoch": 0.7925909903236852, |
| "grad_norm": 0.00025261842529289424, |
| "learning_rate": 1.9811709043880752e-05, |
| "loss": 0.0, |
| "step": 496500 |
| }, |
| { |
| "epoch": 0.7933891685616749, |
| "grad_norm": 0.00024634183500893414, |
| "learning_rate": 1.9793885370164632e-05, |
| "loss": 0.0, |
| "step": 497000 |
| }, |
| { |
| "epoch": 0.7941873467996644, |
| "grad_norm": 0.0001890936982817948, |
| "learning_rate": 1.9776054154657133e-05, |
| "loss": 0.0, |
| "step": 497500 |
| }, |
| { |
| "epoch": 0.7949855250376541, |
| "grad_norm": 0.000774562475271523, |
| "learning_rate": 1.9758215425410505e-05, |
| "loss": 0.0, |
| "step": 498000 |
| }, |
| { |
| "epoch": 0.7957837032756436, |
| "grad_norm": 0.00032418197952210903, |
| "learning_rate": 1.974036921048884e-05, |
| "loss": 0.0, |
| "step": 498500 |
| }, |
| { |
| "epoch": 0.7965818815136333, |
| "grad_norm": 0.0001678819244261831, |
| "learning_rate": 1.9722515537967983e-05, |
| "loss": 0.0, |
| "step": 499000 |
| }, |
| { |
| "epoch": 0.7973800597516228, |
| "grad_norm": 0.00012998198508284986, |
| "learning_rate": 1.970465443593552e-05, |
| "loss": 0.0, |
| "step": 499500 |
| }, |
| { |
| "epoch": 0.7981782379896125, |
| "grad_norm": 0.00015611288836225867, |
| "learning_rate": 1.9686785932490737e-05, |
| "loss": 0.0, |
| "step": 500000 |
| }, |
| { |
| "epoch": 0.7989764162276021, |
| "grad_norm": 9.851302456809208e-05, |
| "learning_rate": 1.9668910055744534e-05, |
| "loss": 0.0, |
| "step": 500500 |
| }, |
| { |
| "epoch": 0.7997745944655917, |
| "grad_norm": 1.1728882789611816, |
| "learning_rate": 1.9651026833819453e-05, |
| "loss": 0.0, |
| "step": 501000 |
| }, |
| { |
| "epoch": 0.8005727727035813, |
| "grad_norm": 0.00021649766131304204, |
| "learning_rate": 1.963313629484955e-05, |
| "loss": 0.0, |
| "step": 501500 |
| }, |
| { |
| "epoch": 0.801370950941571, |
| "grad_norm": 0.0012857463443651795, |
| "learning_rate": 1.9615238466980426e-05, |
| "loss": 0.0, |
| "step": 502000 |
| }, |
| { |
| "epoch": 0.8021691291795606, |
| "grad_norm": 0.001131516881287098, |
| "learning_rate": 1.9597333378369123e-05, |
| "loss": 0.0, |
| "step": 502500 |
| }, |
| { |
| "epoch": 0.8029673074175502, |
| "grad_norm": 0.0011782856890931726, |
| "learning_rate": 1.957942105718412e-05, |
| "loss": 0.0, |
| "step": 503000 |
| }, |
| { |
| "epoch": 0.8037654856555398, |
| "grad_norm": 0.0001663925067987293, |
| "learning_rate": 1.9561501531605272e-05, |
| "loss": 0.0, |
| "step": 503500 |
| }, |
| { |
| "epoch": 0.8045636638935294, |
| "grad_norm": 0.00026339280884712934, |
| "learning_rate": 1.9543574829823768e-05, |
| "loss": 0.0, |
| "step": 504000 |
| }, |
| { |
| "epoch": 0.8053618421315191, |
| "grad_norm": 0.0003007679770234972, |
| "learning_rate": 1.952564098004208e-05, |
| "loss": 0.0, |
| "step": 504500 |
| }, |
| { |
| "epoch": 0.8061600203695086, |
| "grad_norm": 0.00020266381034161896, |
| "learning_rate": 1.9507700010473938e-05, |
| "loss": 0.0, |
| "step": 505000 |
| }, |
| { |
| "epoch": 0.8069581986074983, |
| "grad_norm": 0.0002320464700460434, |
| "learning_rate": 1.9489751949344265e-05, |
| "loss": 0.0, |
| "step": 505500 |
| }, |
| { |
| "epoch": 0.8077563768454878, |
| "grad_norm": 0.00040771221392787993, |
| "learning_rate": 1.9471796824889136e-05, |
| "loss": 0.0, |
| "step": 506000 |
| }, |
| { |
| "epoch": 0.8085545550834775, |
| "grad_norm": 0.00015795578656252474, |
| "learning_rate": 1.9453834665355747e-05, |
| "loss": 0.0, |
| "step": 506500 |
| }, |
| { |
| "epoch": 0.8093527333214671, |
| "grad_norm": 0.00014850639854557812, |
| "learning_rate": 1.9435865499002358e-05, |
| "loss": 0.0, |
| "step": 507000 |
| }, |
| { |
| "epoch": 0.8101509115594567, |
| "grad_norm": 0.023478705435991287, |
| "learning_rate": 1.941788935409825e-05, |
| "loss": 0.0, |
| "step": 507500 |
| }, |
| { |
| "epoch": 0.8109490897974463, |
| "grad_norm": 9.967546793632209e-05, |
| "learning_rate": 1.9399906258923688e-05, |
| "loss": 0.0, |
| "step": 508000 |
| }, |
| { |
| "epoch": 0.811747268035436, |
| "grad_norm": 0.013064282946288586, |
| "learning_rate": 1.938191624176987e-05, |
| "loss": 0.0, |
| "step": 508500 |
| }, |
| { |
| "epoch": 0.8125454462734255, |
| "grad_norm": 0.0002515662636142224, |
| "learning_rate": 1.9363919330938877e-05, |
| "loss": 0.0, |
| "step": 509000 |
| }, |
| { |
| "epoch": 0.8133436245114152, |
| "grad_norm": 9.188640251522884e-05, |
| "learning_rate": 1.9345915554743648e-05, |
| "loss": 0.0, |
| "step": 509500 |
| }, |
| { |
| "epoch": 0.8141418027494047, |
| "grad_norm": 0.0002370925503782928, |
| "learning_rate": 1.9327904941507905e-05, |
| "loss": 0.0, |
| "step": 510000 |
| }, |
| { |
| "epoch": 0.8149399809873944, |
| "grad_norm": 0.0002923431165982038, |
| "learning_rate": 1.9309887519566138e-05, |
| "loss": 0.0, |
| "step": 510500 |
| }, |
| { |
| "epoch": 0.815738159225384, |
| "grad_norm": 0.0011024456471204758, |
| "learning_rate": 1.9291863317263552e-05, |
| "loss": 0.0, |
| "step": 511000 |
| }, |
| { |
| "epoch": 0.8165363374633736, |
| "grad_norm": 0.000260900822468102, |
| "learning_rate": 1.9273832362956013e-05, |
| "loss": 0.0, |
| "step": 511500 |
| }, |
| { |
| "epoch": 0.8173345157013632, |
| "grad_norm": 0.0003531461698003113, |
| "learning_rate": 1.9255794685010005e-05, |
| "loss": 0.0, |
| "step": 512000 |
| }, |
| { |
| "epoch": 0.8181326939393528, |
| "grad_norm": 0.00018732767784968019, |
| "learning_rate": 1.9237750311802607e-05, |
| "loss": 0.0, |
| "step": 512500 |
| }, |
| { |
| "epoch": 0.8189308721773424, |
| "grad_norm": 0.000320764520438388, |
| "learning_rate": 1.9219699271721395e-05, |
| "loss": 0.0, |
| "step": 513000 |
| }, |
| { |
| "epoch": 0.8197290504153321, |
| "grad_norm": 0.0001915542088681832, |
| "learning_rate": 1.920164159316448e-05, |
| "loss": 0.0, |
| "step": 513500 |
| }, |
| { |
| "epoch": 0.8205272286533216, |
| "grad_norm": 0.0005298721953295171, |
| "learning_rate": 1.9183577304540377e-05, |
| "loss": 0.0, |
| "step": 514000 |
| }, |
| { |
| "epoch": 0.8213254068913113, |
| "grad_norm": 0.000173246517078951, |
| "learning_rate": 1.916550643426803e-05, |
| "loss": 0.0, |
| "step": 514500 |
| }, |
| { |
| "epoch": 0.8221235851293008, |
| "grad_norm": 0.00034074197174049914, |
| "learning_rate": 1.9147429010776717e-05, |
| "loss": 0.0, |
| "step": 515000 |
| }, |
| { |
| "epoch": 0.8229217633672905, |
| "grad_norm": 0.0003810925700236112, |
| "learning_rate": 1.9129345062506034e-05, |
| "loss": 0.0, |
| "step": 515500 |
| }, |
| { |
| "epoch": 0.8237199416052801, |
| "grad_norm": 0.0002107440959662199, |
| "learning_rate": 1.911125461790584e-05, |
| "loss": 0.0, |
| "step": 516000 |
| }, |
| { |
| "epoch": 0.8245181198432697, |
| "grad_norm": 0.00023204906028695405, |
| "learning_rate": 1.9093157705436212e-05, |
| "loss": 0.0, |
| "step": 516500 |
| }, |
| { |
| "epoch": 0.8253162980812593, |
| "grad_norm": 0.00024208931426983327, |
| "learning_rate": 1.9075054353567416e-05, |
| "loss": 0.0, |
| "step": 517000 |
| }, |
| { |
| "epoch": 0.826114476319249, |
| "grad_norm": 0.00014883586845826358, |
| "learning_rate": 1.905694459077982e-05, |
| "loss": 0.0, |
| "step": 517500 |
| }, |
| { |
| "epoch": 0.8269126545572386, |
| "grad_norm": 0.017815813422203064, |
| "learning_rate": 1.9038828445563912e-05, |
| "loss": 0.0, |
| "step": 518000 |
| }, |
| { |
| "epoch": 0.8277108327952282, |
| "grad_norm": 0.0003795297525357455, |
| "learning_rate": 1.902070594642019e-05, |
| "loss": 0.0, |
| "step": 518500 |
| }, |
| { |
| "epoch": 0.8285090110332178, |
| "grad_norm": 0.00012054865510435775, |
| "learning_rate": 1.9002577121859175e-05, |
| "loss": 0.0, |
| "step": 519000 |
| }, |
| { |
| "epoch": 0.8293071892712074, |
| "grad_norm": 5.091236591339111, |
| "learning_rate": 1.8984442000401316e-05, |
| "loss": 0.0, |
| "step": 519500 |
| }, |
| { |
| "epoch": 0.8301053675091971, |
| "grad_norm": 0.00017892369942273945, |
| "learning_rate": 1.8966300610576983e-05, |
| "loss": 0.0, |
| "step": 520000 |
| }, |
| { |
| "epoch": 0.8309035457471866, |
| "grad_norm": 0.016385966911911964, |
| "learning_rate": 1.8948152980926404e-05, |
| "loss": 0.0, |
| "step": 520500 |
| }, |
| { |
| "epoch": 0.8317017239851763, |
| "grad_norm": 0.004711247514933348, |
| "learning_rate": 1.892999913999962e-05, |
| "loss": 0.0, |
| "step": 521000 |
| }, |
| { |
| "epoch": 0.8324999022231658, |
| "grad_norm": 0.0005881072720512748, |
| "learning_rate": 1.8911839116356453e-05, |
| "loss": 0.0, |
| "step": 521500 |
| }, |
| { |
| "epoch": 0.8332980804611555, |
| "grad_norm": 0.013069476932287216, |
| "learning_rate": 1.8893672938566436e-05, |
| "loss": 0.0, |
| "step": 522000 |
| }, |
| { |
| "epoch": 0.834096258699145, |
| "grad_norm": 0.00010810209641931579, |
| "learning_rate": 1.8875500635208797e-05, |
| "loss": 0.0, |
| "step": 522500 |
| }, |
| { |
| "epoch": 0.8348944369371347, |
| "grad_norm": 7.931066647870466e-05, |
| "learning_rate": 1.8857322234872397e-05, |
| "loss": 0.0, |
| "step": 523000 |
| }, |
| { |
| "epoch": 0.8356926151751243, |
| "grad_norm": 0.00013571855379268527, |
| "learning_rate": 1.883913776615569e-05, |
| "loss": 0.0, |
| "step": 523500 |
| }, |
| { |
| "epoch": 0.8364907934131139, |
| "grad_norm": 8.749699190957472e-05, |
| "learning_rate": 1.8820947257666667e-05, |
| "loss": 0.0, |
| "step": 524000 |
| }, |
| { |
| "epoch": 0.8372889716511035, |
| "grad_norm": 100.56954193115234, |
| "learning_rate": 1.8802750738022838e-05, |
| "loss": 0.0, |
| "step": 524500 |
| }, |
| { |
| "epoch": 0.8380871498890932, |
| "grad_norm": 0.0004932993906550109, |
| "learning_rate": 1.8784548235851168e-05, |
| "loss": 0.0, |
| "step": 525000 |
| }, |
| { |
| "epoch": 0.8388853281270827, |
| "grad_norm": 9.414002124685794e-05, |
| "learning_rate": 1.8766339779788005e-05, |
| "loss": 0.0, |
| "step": 525500 |
| }, |
| { |
| "epoch": 0.8396835063650724, |
| "grad_norm": 217.5888671875, |
| "learning_rate": 1.87481253984791e-05, |
| "loss": 0.0, |
| "step": 526000 |
| }, |
| { |
| "epoch": 0.8404816846030619, |
| "grad_norm": 0.0001938005443662405, |
| "learning_rate": 1.8729905120579513e-05, |
| "loss": 0.0, |
| "step": 526500 |
| }, |
| { |
| "epoch": 0.8412798628410516, |
| "grad_norm": 7.117674977052957e-05, |
| "learning_rate": 1.871167897475357e-05, |
| "loss": 0.0, |
| "step": 527000 |
| }, |
| { |
| "epoch": 0.8420780410790412, |
| "grad_norm": 0.0006304022972472012, |
| "learning_rate": 1.8693446989674846e-05, |
| "loss": 0.0, |
| "step": 527500 |
| }, |
| { |
| "epoch": 0.8428762193170308, |
| "grad_norm": 0.00013564492110162973, |
| "learning_rate": 1.8675209194026088e-05, |
| "loss": 0.0, |
| "step": 528000 |
| }, |
| { |
| "epoch": 0.8436743975550204, |
| "grad_norm": 372.4085388183594, |
| "learning_rate": 1.8656965616499194e-05, |
| "loss": 0.0, |
| "step": 528500 |
| }, |
| { |
| "epoch": 0.84447257579301, |
| "grad_norm": 0.031750548630952835, |
| "learning_rate": 1.8638716285795146e-05, |
| "loss": 0.0, |
| "step": 529000 |
| }, |
| { |
| "epoch": 0.8452707540309996, |
| "grad_norm": 0.00014481242396868765, |
| "learning_rate": 1.8620461230623994e-05, |
| "loss": 0.0, |
| "step": 529500 |
| }, |
| { |
| "epoch": 0.8460689322689893, |
| "grad_norm": 0.0007552816532552242, |
| "learning_rate": 1.8602200479704776e-05, |
| "loss": 0.0, |
| "step": 530000 |
| }, |
| { |
| "epoch": 0.8468671105069788, |
| "grad_norm": 0.00011971026106039062, |
| "learning_rate": 1.858393406176551e-05, |
| "loss": 0.0, |
| "step": 530500 |
| }, |
| { |
| "epoch": 0.8476652887449685, |
| "grad_norm": 9.909499931382015e-05, |
| "learning_rate": 1.8565662005543106e-05, |
| "loss": 0.0, |
| "step": 531000 |
| }, |
| { |
| "epoch": 0.848463466982958, |
| "grad_norm": 0.0002251157711725682, |
| "learning_rate": 1.8547384339783368e-05, |
| "loss": 0.0, |
| "step": 531500 |
| }, |
| { |
| "epoch": 0.8492616452209477, |
| "grad_norm": 0.00016564581892453134, |
| "learning_rate": 1.852910109324091e-05, |
| "loss": 0.0, |
| "step": 532000 |
| }, |
| { |
| "epoch": 0.8500598234589373, |
| "grad_norm": 0.00014557143731508404, |
| "learning_rate": 1.8510812294679122e-05, |
| "loss": 0.0, |
| "step": 532500 |
| }, |
| { |
| "epoch": 0.8508580016969269, |
| "grad_norm": 0.00032126580481417477, |
| "learning_rate": 1.849251797287015e-05, |
| "loss": 0.0, |
| "step": 533000 |
| }, |
| { |
| "epoch": 0.8516561799349166, |
| "grad_norm": 0.0002472183550707996, |
| "learning_rate": 1.8474218156594805e-05, |
| "loss": 0.0, |
| "step": 533500 |
| }, |
| { |
| "epoch": 0.8524543581729062, |
| "grad_norm": 0.0003146003873553127, |
| "learning_rate": 1.8455912874642562e-05, |
| "loss": 0.0, |
| "step": 534000 |
| }, |
| { |
| "epoch": 0.8532525364108958, |
| "grad_norm": 0.00025272014318034053, |
| "learning_rate": 1.8437602155811473e-05, |
| "loss": 0.0, |
| "step": 534500 |
| }, |
| { |
| "epoch": 0.8540507146488854, |
| "grad_norm": 0.00020710949320346117, |
| "learning_rate": 1.8419286028908164e-05, |
| "loss": 0.0, |
| "step": 535000 |
| }, |
| { |
| "epoch": 0.854848892886875, |
| "grad_norm": 0.00043446040945127606, |
| "learning_rate": 1.840096452274776e-05, |
| "loss": 0.0, |
| "step": 535500 |
| }, |
| { |
| "epoch": 0.8556470711248646, |
| "grad_norm": 0.0002756445901468396, |
| "learning_rate": 1.8382637666153842e-05, |
| "loss": 0.0, |
| "step": 536000 |
| }, |
| { |
| "epoch": 0.8564452493628543, |
| "grad_norm": 0.00025066762464120984, |
| "learning_rate": 1.8364305487958422e-05, |
| "loss": 0.0, |
| "step": 536500 |
| }, |
| { |
| "epoch": 0.8572434276008438, |
| "grad_norm": 0.00021802319679409266, |
| "learning_rate": 1.8345968017001875e-05, |
| "loss": 0.0, |
| "step": 537000 |
| }, |
| { |
| "epoch": 0.8580416058388335, |
| "grad_norm": 0.0005224825581535697, |
| "learning_rate": 1.8327625282132908e-05, |
| "loss": 0.0, |
| "step": 537500 |
| }, |
| { |
| "epoch": 0.858839784076823, |
| "grad_norm": 0.00023706798674538732, |
| "learning_rate": 1.8309277312208506e-05, |
| "loss": 0.0, |
| "step": 538000 |
| }, |
| { |
| "epoch": 0.8596379623148127, |
| "grad_norm": 0.003222405444830656, |
| "learning_rate": 1.8290924136093882e-05, |
| "loss": 0.0, |
| "step": 538500 |
| }, |
| { |
| "epoch": 0.8604361405528023, |
| "grad_norm": 0.002675387542694807, |
| "learning_rate": 1.8272565782662458e-05, |
| "loss": 0.0, |
| "step": 539000 |
| }, |
| { |
| "epoch": 0.8612343187907919, |
| "grad_norm": 0.00026413999148644507, |
| "learning_rate": 1.8254202280795784e-05, |
| "loss": 0.0, |
| "step": 539500 |
| }, |
| { |
| "epoch": 0.8620324970287815, |
| "grad_norm": 0.0013028520625084639, |
| "learning_rate": 1.823583365938352e-05, |
| "loss": 0.0, |
| "step": 540000 |
| }, |
| { |
| "epoch": 0.8628306752667712, |
| "grad_norm": 0.00041675748070701957, |
| "learning_rate": 1.8217459947323374e-05, |
| "loss": 0.0, |
| "step": 540500 |
| }, |
| { |
| "epoch": 0.8636288535047607, |
| "grad_norm": 0.0011459665838629007, |
| "learning_rate": 1.819908117352107e-05, |
| "loss": 0.0, |
| "step": 541000 |
| }, |
| { |
| "epoch": 0.8644270317427504, |
| "grad_norm": 0.0003427125629968941, |
| "learning_rate": 1.818069736689028e-05, |
| "loss": 0.0, |
| "step": 541500 |
| }, |
| { |
| "epoch": 0.8652252099807399, |
| "grad_norm": 0.00021466145699378103, |
| "learning_rate": 1.816230855635261e-05, |
| "loss": 0.0, |
| "step": 542000 |
| }, |
| { |
| "epoch": 0.8660233882187296, |
| "grad_norm": 0.00012651206634473056, |
| "learning_rate": 1.8143914770837535e-05, |
| "loss": 0.0, |
| "step": 542500 |
| }, |
| { |
| "epoch": 0.8668215664567192, |
| "grad_norm": 0.0002110886125592515, |
| "learning_rate": 1.8125516039282347e-05, |
| "loss": 0.0, |
| "step": 543000 |
| }, |
| { |
| "epoch": 0.8676197446947088, |
| "grad_norm": 0.0003328848397359252, |
| "learning_rate": 1.8107112390632135e-05, |
| "loss": 0.0, |
| "step": 543500 |
| }, |
| { |
| "epoch": 0.8684179229326984, |
| "grad_norm": 0.005305567290633917, |
| "learning_rate": 1.8088703853839707e-05, |
| "loss": 0.0, |
| "step": 544000 |
| }, |
| { |
| "epoch": 0.869216101170688, |
| "grad_norm": 0.0003353380016051233, |
| "learning_rate": 1.8070290457865575e-05, |
| "loss": 0.0, |
| "step": 544500 |
| }, |
| { |
| "epoch": 0.8700142794086776, |
| "grad_norm": 0.11375279724597931, |
| "learning_rate": 1.8051872231677876e-05, |
| "loss": 0.0, |
| "step": 545000 |
| }, |
| { |
| "epoch": 0.8708124576466673, |
| "grad_norm": 0.00021715887123718858, |
| "learning_rate": 1.8033449204252376e-05, |
| "loss": 0.0, |
| "step": 545500 |
| }, |
| { |
| "epoch": 0.8716106358846568, |
| "grad_norm": 0.0002547954791225493, |
| "learning_rate": 1.801502140457236e-05, |
| "loss": 0.0, |
| "step": 546000 |
| }, |
| { |
| "epoch": 0.8724088141226465, |
| "grad_norm": 0.0003588471154216677, |
| "learning_rate": 1.7996588861628653e-05, |
| "loss": 0.0, |
| "step": 546500 |
| }, |
| { |
| "epoch": 0.873206992360636, |
| "grad_norm": 0.0005377253983169794, |
| "learning_rate": 1.797815160441952e-05, |
| "loss": 0.0, |
| "step": 547000 |
| }, |
| { |
| "epoch": 0.8740051705986257, |
| "grad_norm": 4.066957473754883, |
| "learning_rate": 1.7959709661950656e-05, |
| "loss": 0.0, |
| "step": 547500 |
| }, |
| { |
| "epoch": 0.8748033488366153, |
| "grad_norm": 0.0021291342563927174, |
| "learning_rate": 1.7941263063235112e-05, |
| "loss": 0.0, |
| "step": 548000 |
| }, |
| { |
| "epoch": 0.8756015270746049, |
| "grad_norm": 0.005908517632633448, |
| "learning_rate": 1.792281183729328e-05, |
| "loss": 0.0, |
| "step": 548500 |
| }, |
| { |
| "epoch": 0.8763997053125945, |
| "grad_norm": 0.00016735069220885634, |
| "learning_rate": 1.790435601315282e-05, |
| "loss": 0.0, |
| "step": 549000 |
| }, |
| { |
| "epoch": 0.8771978835505841, |
| "grad_norm": 0.11726677417755127, |
| "learning_rate": 1.7885895619848632e-05, |
| "loss": 0.0, |
| "step": 549500 |
| }, |
| { |
| "epoch": 0.8779960617885738, |
| "grad_norm": 0.0001299067516811192, |
| "learning_rate": 1.7867430686422805e-05, |
| "loss": 0.0, |
| "step": 550000 |
| }, |
| { |
| "epoch": 0.8787942400265634, |
| "grad_norm": 0.0008645313209854066, |
| "learning_rate": 1.7848961241924568e-05, |
| "loss": 0.0, |
| "step": 550500 |
| }, |
| { |
| "epoch": 0.879592418264553, |
| "grad_norm": 0.001213490148074925, |
| "learning_rate": 1.7830487315410244e-05, |
| "loss": 0.0, |
| "step": 551000 |
| }, |
| { |
| "epoch": 0.8803905965025426, |
| "grad_norm": 0.0003075381100643426, |
| "learning_rate": 1.7812008935943214e-05, |
| "loss": 0.0, |
| "step": 551500 |
| }, |
| { |
| "epoch": 0.8811887747405323, |
| "grad_norm": 0.00016041690832935274, |
| "learning_rate": 1.779352613259386e-05, |
| "loss": 0.0, |
| "step": 552000 |
| }, |
| { |
| "epoch": 0.8819869529785218, |
| "grad_norm": 0.00022118906781543046, |
| "learning_rate": 1.777503893443952e-05, |
| "loss": 0.0, |
| "step": 552500 |
| }, |
| { |
| "epoch": 0.8827851312165115, |
| "grad_norm": 0.0002528753539081663, |
| "learning_rate": 1.7756547370564453e-05, |
| "loss": 0.0, |
| "step": 553000 |
| }, |
| { |
| "epoch": 0.883583309454501, |
| "grad_norm": 0.00020087572920601815, |
| "learning_rate": 1.7738051470059794e-05, |
| "loss": 0.0, |
| "step": 553500 |
| }, |
| { |
| "epoch": 0.8843814876924907, |
| "grad_norm": 0.0002061546838376671, |
| "learning_rate": 1.7719551262023474e-05, |
| "loss": 0.0, |
| "step": 554000 |
| }, |
| { |
| "epoch": 0.8851796659304803, |
| "grad_norm": 0.004588930867612362, |
| "learning_rate": 1.7701046775560224e-05, |
| "loss": 0.0, |
| "step": 554500 |
| }, |
| { |
| "epoch": 0.8859778441684699, |
| "grad_norm": 0.00019890641851816326, |
| "learning_rate": 1.76825380397815e-05, |
| "loss": 0.0, |
| "step": 555000 |
| }, |
| { |
| "epoch": 0.8867760224064595, |
| "grad_norm": 0.0015211553545668721, |
| "learning_rate": 1.766402508380544e-05, |
| "loss": 0.0, |
| "step": 555500 |
| }, |
| { |
| "epoch": 0.8875742006444491, |
| "grad_norm": 0.00012981586041860282, |
| "learning_rate": 1.7645507936756825e-05, |
| "loss": 0.0, |
| "step": 556000 |
| }, |
| { |
| "epoch": 0.8883723788824387, |
| "grad_norm": 0.0001142864057328552, |
| "learning_rate": 1.7626986627767025e-05, |
| "loss": 0.0, |
| "step": 556500 |
| }, |
| { |
| "epoch": 0.8891705571204284, |
| "grad_norm": 0.00013470168050844222, |
| "learning_rate": 1.760846118597396e-05, |
| "loss": 0.0, |
| "step": 557000 |
| }, |
| { |
| "epoch": 0.8899687353584179, |
| "grad_norm": 0.00016007563681341708, |
| "learning_rate": 1.7589931640522053e-05, |
| "loss": 0.0, |
| "step": 557500 |
| }, |
| { |
| "epoch": 0.8907669135964076, |
| "grad_norm": 0.00014388897398021072, |
| "learning_rate": 1.757139802056218e-05, |
| "loss": 0.0, |
| "step": 558000 |
| }, |
| { |
| "epoch": 0.8915650918343971, |
| "grad_norm": 0.00018686757539398968, |
| "learning_rate": 1.7552860355251632e-05, |
| "loss": 0.0, |
| "step": 558500 |
| }, |
| { |
| "epoch": 0.8923632700723868, |
| "grad_norm": 0.002567690797150135, |
| "learning_rate": 1.7534318673754057e-05, |
| "loss": 0.0, |
| "step": 559000 |
| }, |
| { |
| "epoch": 0.8931614483103764, |
| "grad_norm": 0.00014588158228434622, |
| "learning_rate": 1.751577300523943e-05, |
| "loss": 0.0, |
| "step": 559500 |
| }, |
| { |
| "epoch": 0.893959626548366, |
| "grad_norm": 0.00012244051322340965, |
| "learning_rate": 1.749722337888399e-05, |
| "loss": 0.0, |
| "step": 560000 |
| }, |
| { |
| "epoch": 0.8947578047863556, |
| "grad_norm": 0.0001751197996782139, |
| "learning_rate": 1.7478669823870202e-05, |
| "loss": 0.0, |
| "step": 560500 |
| }, |
| { |
| "epoch": 0.8955559830243452, |
| "grad_norm": 0.0001905184908537194, |
| "learning_rate": 1.7460112369386723e-05, |
| "loss": 0.0, |
| "step": 561000 |
| }, |
| { |
| "epoch": 0.8963541612623348, |
| "grad_norm": 0.00021194595319684595, |
| "learning_rate": 1.7441551044628338e-05, |
| "loss": 0.0, |
| "step": 561500 |
| }, |
| { |
| "epoch": 0.8971523395003245, |
| "grad_norm": 0.00022341775184031576, |
| "learning_rate": 1.742298587879592e-05, |
| "loss": 0.0, |
| "step": 562000 |
| }, |
| { |
| "epoch": 0.897950517738314, |
| "grad_norm": 0.00014410440053325146, |
| "learning_rate": 1.7404416901096373e-05, |
| "loss": 0.0, |
| "step": 562500 |
| }, |
| { |
| "epoch": 0.8987486959763037, |
| "grad_norm": 8.38216656120494e-05, |
| "learning_rate": 1.738584414074263e-05, |
| "loss": 0.0, |
| "step": 563000 |
| }, |
| { |
| "epoch": 0.8995468742142932, |
| "grad_norm": 0.0003483085019979626, |
| "learning_rate": 1.736726762695354e-05, |
| "loss": 0.0, |
| "step": 563500 |
| }, |
| { |
| "epoch": 0.9003450524522829, |
| "grad_norm": 0.006742103956639767, |
| "learning_rate": 1.7348687388953877e-05, |
| "loss": 0.0, |
| "step": 564000 |
| }, |
| { |
| "epoch": 0.9011432306902725, |
| "grad_norm": 0.00015459113637916744, |
| "learning_rate": 1.7330103455974265e-05, |
| "loss": 0.0, |
| "step": 564500 |
| }, |
| { |
| "epoch": 0.9019414089282621, |
| "grad_norm": 0.00011757721949834377, |
| "learning_rate": 1.7311515857251152e-05, |
| "loss": 0.0, |
| "step": 565000 |
| }, |
| { |
| "epoch": 0.9027395871662518, |
| "grad_norm": 0.0001631448103580624, |
| "learning_rate": 1.7292924622026736e-05, |
| "loss": 0.0, |
| "step": 565500 |
| }, |
| { |
| "epoch": 0.9035377654042414, |
| "grad_norm": 0.000231702157179825, |
| "learning_rate": 1.727432977954896e-05, |
| "loss": 0.0, |
| "step": 566000 |
| }, |
| { |
| "epoch": 0.904335943642231, |
| "grad_norm": 0.00016405931091867387, |
| "learning_rate": 1.725573135907141e-05, |
| "loss": 0.0, |
| "step": 566500 |
| }, |
| { |
| "epoch": 0.9051341218802206, |
| "grad_norm": 0.0002725492522586137, |
| "learning_rate": 1.7237129389853332e-05, |
| "loss": 0.0, |
| "step": 567000 |
| }, |
| { |
| "epoch": 0.9059323001182102, |
| "grad_norm": 0.00022207711299415678, |
| "learning_rate": 1.7218523901159536e-05, |
| "loss": 0.0, |
| "step": 567500 |
| }, |
| { |
| "epoch": 0.9067304783561998, |
| "grad_norm": 0.010523856617510319, |
| "learning_rate": 1.7199914922260375e-05, |
| "loss": 0.0, |
| "step": 568000 |
| }, |
| { |
| "epoch": 0.9075286565941895, |
| "grad_norm": 0.00027734291506931186, |
| "learning_rate": 1.7181302482431694e-05, |
| "loss": 0.0, |
| "step": 568500 |
| }, |
| { |
| "epoch": 0.908326834832179, |
| "grad_norm": 0.00016665668226778507, |
| "learning_rate": 1.7162686610954778e-05, |
| "loss": 0.0, |
| "step": 569000 |
| }, |
| { |
| "epoch": 0.9091250130701687, |
| "grad_norm": 0.00012514390982687473, |
| "learning_rate": 1.714406733711632e-05, |
| "loss": 0.0, |
| "step": 569500 |
| }, |
| { |
| "epoch": 0.9099231913081582, |
| "grad_norm": 0.0009053830290213227, |
| "learning_rate": 1.7125444690208352e-05, |
| "loss": 0.0001, |
| "step": 570000 |
| }, |
| { |
| "epoch": 0.9107213695461479, |
| "grad_norm": 0.000333428499288857, |
| "learning_rate": 1.710681869952822e-05, |
| "loss": 0.0, |
| "step": 570500 |
| }, |
| { |
| "epoch": 0.9115195477841375, |
| "grad_norm": 0.0003971235710196197, |
| "learning_rate": 1.7088189394378537e-05, |
| "loss": 0.0, |
| "step": 571000 |
| }, |
| { |
| "epoch": 0.9123177260221271, |
| "grad_norm": 0.0004157690273132175, |
| "learning_rate": 1.706955680406712e-05, |
| "loss": 0.0, |
| "step": 571500 |
| }, |
| { |
| "epoch": 0.9131159042601167, |
| "grad_norm": 0.00027369658346287906, |
| "learning_rate": 1.7050920957906956e-05, |
| "loss": 0.0, |
| "step": 572000 |
| }, |
| { |
| "epoch": 0.9139140824981064, |
| "grad_norm": 4293.71044921875, |
| "learning_rate": 1.703228188521616e-05, |
| "loss": 0.0, |
| "step": 572500 |
| }, |
| { |
| "epoch": 0.9147122607360959, |
| "grad_norm": 0.0003048023791052401, |
| "learning_rate": 1.7013639615317912e-05, |
| "loss": 0.0, |
| "step": 573000 |
| }, |
| { |
| "epoch": 0.9155104389740856, |
| "grad_norm": 312.3310546875, |
| "learning_rate": 1.699499417754044e-05, |
| "loss": 0.0, |
| "step": 573500 |
| }, |
| { |
| "epoch": 0.9163086172120751, |
| "grad_norm": 0.0006332839257083833, |
| "learning_rate": 1.6976345601216934e-05, |
| "loss": 0.0, |
| "step": 574000 |
| }, |
| { |
| "epoch": 0.9171067954500648, |
| "grad_norm": 0.0004323949106037617, |
| "learning_rate": 1.695769391568554e-05, |
| "loss": 0.0, |
| "step": 574500 |
| }, |
| { |
| "epoch": 0.9179049736880543, |
| "grad_norm": 0.0002921383420471102, |
| "learning_rate": 1.6939039150289284e-05, |
| "loss": 0.0, |
| "step": 575000 |
| }, |
| { |
| "epoch": 0.918703151926044, |
| "grad_norm": 0.00022676597291138023, |
| "learning_rate": 1.692038133437604e-05, |
| "loss": 0.0, |
| "step": 575500 |
| }, |
| { |
| "epoch": 0.9195013301640336, |
| "grad_norm": 0.00021034496603533626, |
| "learning_rate": 1.690172049729849e-05, |
| "loss": 0.0, |
| "step": 576000 |
| }, |
| { |
| "epoch": 0.9202995084020232, |
| "grad_norm": 0.00011297802848275751, |
| "learning_rate": 1.6883056668414055e-05, |
| "loss": 0.0, |
| "step": 576500 |
| }, |
| { |
| "epoch": 0.9210976866400128, |
| "grad_norm": 0.0002499468100722879, |
| "learning_rate": 1.6864389877084864e-05, |
| "loss": 0.0, |
| "step": 577000 |
| }, |
| { |
| "epoch": 0.9218958648780025, |
| "grad_norm": 0.0009551959810778499, |
| "learning_rate": 1.6845720152677714e-05, |
| "loss": 0.0, |
| "step": 577500 |
| }, |
| { |
| "epoch": 0.922694043115992, |
| "grad_norm": 0.00026030378649011254, |
| "learning_rate": 1.6827047524564023e-05, |
| "loss": 0.0, |
| "step": 578000 |
| }, |
| { |
| "epoch": 0.9234922213539817, |
| "grad_norm": 0.0013032422866672277, |
| "learning_rate": 1.6808372022119757e-05, |
| "loss": 0.0, |
| "step": 578500 |
| }, |
| { |
| "epoch": 0.9242903995919712, |
| "grad_norm": 0.04895612224936485, |
| "learning_rate": 1.6789693674725426e-05, |
| "loss": 0.0, |
| "step": 579000 |
| }, |
| { |
| "epoch": 0.9250885778299609, |
| "grad_norm": 0.0001584753772476688, |
| "learning_rate": 1.677101251176599e-05, |
| "loss": 0.0, |
| "step": 579500 |
| }, |
| { |
| "epoch": 0.9258867560679505, |
| "grad_norm": 0.00018785694555845112, |
| "learning_rate": 1.6752328562630863e-05, |
| "loss": 0.0, |
| "step": 580000 |
| }, |
| { |
| "epoch": 0.9266849343059401, |
| "grad_norm": 0.00013747978664468974, |
| "learning_rate": 1.673364185671383e-05, |
| "loss": 0.0, |
| "step": 580500 |
| }, |
| { |
| "epoch": 0.9274831125439298, |
| "grad_norm": 0.0001757456484483555, |
| "learning_rate": 1.671495242341301e-05, |
| "loss": 0.0, |
| "step": 581000 |
| }, |
| { |
| "epoch": 0.9282812907819193, |
| "grad_norm": 0.00012764699931722134, |
| "learning_rate": 1.6696260292130827e-05, |
| "loss": 0.0, |
| "step": 581500 |
| }, |
| { |
| "epoch": 0.929079469019909, |
| "grad_norm": 0.00016263512952718884, |
| "learning_rate": 1.6677565492273935e-05, |
| "loss": 0.0, |
| "step": 582000 |
| }, |
| { |
| "epoch": 0.9298776472578986, |
| "grad_norm": 0.00020701033645309508, |
| "learning_rate": 1.66588680532532e-05, |
| "loss": 0.0, |
| "step": 582500 |
| }, |
| { |
| "epoch": 0.9306758254958882, |
| "grad_norm": 0.0002117520198225975, |
| "learning_rate": 1.6640168004483616e-05, |
| "loss": 0.0, |
| "step": 583000 |
| }, |
| { |
| "epoch": 0.9314740037338778, |
| "grad_norm": 0.00017498256056569517, |
| "learning_rate": 1.662146537538431e-05, |
| "loss": 0.0, |
| "step": 583500 |
| }, |
| { |
| "epoch": 0.9322721819718675, |
| "grad_norm": 0.00016180331294890493, |
| "learning_rate": 1.660276019537845e-05, |
| "loss": 0.0, |
| "step": 584000 |
| }, |
| { |
| "epoch": 0.933070360209857, |
| "grad_norm": 0.00016664137365296483, |
| "learning_rate": 1.658405249389323e-05, |
| "loss": 0.0, |
| "step": 584500 |
| }, |
| { |
| "epoch": 0.9338685384478467, |
| "grad_norm": 0.0004110218142159283, |
| "learning_rate": 1.65653423003598e-05, |
| "loss": 0.0, |
| "step": 585000 |
| }, |
| { |
| "epoch": 0.9346667166858362, |
| "grad_norm": 0.00035451291478239, |
| "learning_rate": 1.6546629644213244e-05, |
| "loss": 0.0, |
| "step": 585500 |
| }, |
| { |
| "epoch": 0.9354648949238259, |
| "grad_norm": 0.00023166697064880282, |
| "learning_rate": 1.6527914554892503e-05, |
| "loss": 0.0, |
| "step": 586000 |
| }, |
| { |
| "epoch": 0.9362630731618155, |
| "grad_norm": 0.00042415011557750404, |
| "learning_rate": 1.650919706184035e-05, |
| "loss": 0.0, |
| "step": 586500 |
| }, |
| { |
| "epoch": 0.9370612513998051, |
| "grad_norm": 0.000651057343930006, |
| "learning_rate": 1.6490477194503354e-05, |
| "loss": 0.0, |
| "step": 587000 |
| }, |
| { |
| "epoch": 0.9378594296377947, |
| "grad_norm": 0.0006024042959325016, |
| "learning_rate": 1.6471754982331805e-05, |
| "loss": 0.0, |
| "step": 587500 |
| }, |
| { |
| "epoch": 0.9386576078757843, |
| "grad_norm": 0.00014395530160982162, |
| "learning_rate": 1.645303045477969e-05, |
| "loss": 0.0, |
| "step": 588000 |
| }, |
| { |
| "epoch": 0.9394557861137739, |
| "grad_norm": 0.00016363435133825988, |
| "learning_rate": 1.6434303641304624e-05, |
| "loss": 0.0, |
| "step": 588500 |
| }, |
| { |
| "epoch": 0.9402539643517636, |
| "grad_norm": 0.00018911808729171753, |
| "learning_rate": 1.6415574571367838e-05, |
| "loss": 0.0, |
| "step": 589000 |
| }, |
| { |
| "epoch": 0.9410521425897531, |
| "grad_norm": 0.00025503954384475946, |
| "learning_rate": 1.6396843274434104e-05, |
| "loss": 0.0, |
| "step": 589500 |
| }, |
| { |
| "epoch": 0.9418503208277428, |
| "grad_norm": 0.000276244361884892, |
| "learning_rate": 1.6378109779971688e-05, |
| "loss": 0.0, |
| "step": 590000 |
| }, |
| { |
| "epoch": 0.9426484990657323, |
| "grad_norm": 0.00021976447897031903, |
| "learning_rate": 1.6359374117452336e-05, |
| "loss": 0.0, |
| "step": 590500 |
| }, |
| { |
| "epoch": 0.943446677303722, |
| "grad_norm": 0.00018629759142640978, |
| "learning_rate": 1.634063631635118e-05, |
| "loss": 0.0, |
| "step": 591000 |
| }, |
| { |
| "epoch": 0.9442448555417116, |
| "grad_norm": 0.00018371363694313914, |
| "learning_rate": 1.6321896406146738e-05, |
| "loss": 0.0, |
| "step": 591500 |
| }, |
| { |
| "epoch": 0.9450430337797012, |
| "grad_norm": 0.0001642345596337691, |
| "learning_rate": 1.6303154416320825e-05, |
| "loss": 0.0, |
| "step": 592000 |
| }, |
| { |
| "epoch": 0.9458412120176908, |
| "grad_norm": 0.0002542451547924429, |
| "learning_rate": 1.6284410376358545e-05, |
| "loss": 0.0, |
| "step": 592500 |
| }, |
| { |
| "epoch": 0.9466393902556804, |
| "grad_norm": 0.0003169816918671131, |
| "learning_rate": 1.6265664315748215e-05, |
| "loss": 0.0, |
| "step": 593000 |
| }, |
| { |
| "epoch": 0.94743756849367, |
| "grad_norm": 147.49600219726562, |
| "learning_rate": 1.6246916263981344e-05, |
| "loss": 0.0, |
| "step": 593500 |
| }, |
| { |
| "epoch": 0.9482357467316597, |
| "grad_norm": 0.0002796686312649399, |
| "learning_rate": 1.6228166250552565e-05, |
| "loss": 0.0, |
| "step": 594000 |
| }, |
| { |
| "epoch": 0.9490339249696492, |
| "grad_norm": 0.00013148550351615995, |
| "learning_rate": 1.620941430495959e-05, |
| "loss": 0.0, |
| "step": 594500 |
| }, |
| { |
| "epoch": 0.9498321032076389, |
| "grad_norm": 0.0008374619064852595, |
| "learning_rate": 1.6190660456703192e-05, |
| "loss": 0.0, |
| "step": 595000 |
| }, |
| { |
| "epoch": 0.9506302814456284, |
| "grad_norm": 0.00010609177843434736, |
| "learning_rate": 1.6171904735287114e-05, |
| "loss": 0.0, |
| "step": 595500 |
| }, |
| { |
| "epoch": 0.9514284596836181, |
| "grad_norm": 0.00019812423852272332, |
| "learning_rate": 1.6153147170218062e-05, |
| "loss": 0.0, |
| "step": 596000 |
| }, |
| { |
| "epoch": 0.9522266379216078, |
| "grad_norm": 0.000553951773326844, |
| "learning_rate": 1.6134387791005628e-05, |
| "loss": 0.0, |
| "step": 596500 |
| }, |
| { |
| "epoch": 0.9530248161595973, |
| "grad_norm": 0.00015377177624031901, |
| "learning_rate": 1.611562662716228e-05, |
| "loss": 0.0, |
| "step": 597000 |
| }, |
| { |
| "epoch": 0.953822994397587, |
| "grad_norm": 0.00025437449221499264, |
| "learning_rate": 1.609686370820327e-05, |
| "loss": 0.0, |
| "step": 597500 |
| }, |
| { |
| "epoch": 0.9546211726355766, |
| "grad_norm": 0.00011710778926499188, |
| "learning_rate": 1.607809906364662e-05, |
| "loss": 0.0, |
| "step": 598000 |
| }, |
| { |
| "epoch": 0.9554193508735662, |
| "grad_norm": 0.00018913969688583165, |
| "learning_rate": 1.6059332723013078e-05, |
| "loss": 0.0, |
| "step": 598500 |
| }, |
| { |
| "epoch": 0.9562175291115558, |
| "grad_norm": 0.07491962611675262, |
| "learning_rate": 1.6040564715826045e-05, |
| "loss": 0.0, |
| "step": 599000 |
| }, |
| { |
| "epoch": 0.9570157073495454, |
| "grad_norm": 0.00014903175178915262, |
| "learning_rate": 1.6021795071611546e-05, |
| "loss": 0.0, |
| "step": 599500 |
| }, |
| { |
| "epoch": 0.957813885587535, |
| "grad_norm": 0.0003374506486579776, |
| "learning_rate": 1.6003023819898188e-05, |
| "loss": 0.0, |
| "step": 600000 |
| }, |
| { |
| "epoch": 0.957813885587535, |
| "eval_loss": 1.3122987184033263e-05, |
| "eval_runtime": 22158.8784, |
| "eval_samples_per_second": 100.515, |
| "eval_steps_per_second": 3.141, |
| "step": 600000 |
| }, |
| { |
| "epoch": 0.9586120638255247, |
| "grad_norm": 0.002838114043697715, |
| "learning_rate": 1.5984250990217106e-05, |
| "loss": 0.0, |
| "step": 600500 |
| }, |
| { |
| "epoch": 0.9594102420635142, |
| "grad_norm": 4460.6826171875, |
| "learning_rate": 1.596547661210191e-05, |
| "loss": 0.0, |
| "step": 601000 |
| }, |
| { |
| "epoch": 0.9602084203015039, |
| "grad_norm": 0.00011342266952851787, |
| "learning_rate": 1.594670071508865e-05, |
| "loss": 0.0, |
| "step": 601500 |
| }, |
| { |
| "epoch": 0.9610065985394934, |
| "grad_norm": 0.00016833537665661424, |
| "learning_rate": 1.592792332871578e-05, |
| "loss": 0.0, |
| "step": 602000 |
| }, |
| { |
| "epoch": 0.9618047767774831, |
| "grad_norm": 0.0022102862130850554, |
| "learning_rate": 1.5909144482524065e-05, |
| "loss": 0.0, |
| "step": 602500 |
| }, |
| { |
| "epoch": 0.9626029550154727, |
| "grad_norm": 0.00012462472659535706, |
| "learning_rate": 1.5890364206056598e-05, |
| "loss": 0.0, |
| "step": 603000 |
| }, |
| { |
| "epoch": 0.9634011332534623, |
| "grad_norm": 0.00019439893367234617, |
| "learning_rate": 1.58715825288587e-05, |
| "loss": 0.0, |
| "step": 603500 |
| }, |
| { |
| "epoch": 0.9641993114914519, |
| "grad_norm": 0.00014579604612663388, |
| "learning_rate": 1.5852799480477917e-05, |
| "loss": 0.0, |
| "step": 604000 |
| }, |
| { |
| "epoch": 0.9649974897294415, |
| "grad_norm": 0.0004376780125312507, |
| "learning_rate": 1.5834015090463934e-05, |
| "loss": 0.0, |
| "step": 604500 |
| }, |
| { |
| "epoch": 0.9657956679674311, |
| "grad_norm": 0.013489479199051857, |
| "learning_rate": 1.5815229388368547e-05, |
| "loss": 0.0, |
| "step": 605000 |
| }, |
| { |
| "epoch": 0.9665938462054208, |
| "grad_norm": 0.0001327054196735844, |
| "learning_rate": 1.579644240374563e-05, |
| "loss": 0.0, |
| "step": 605500 |
| }, |
| { |
| "epoch": 0.9673920244434103, |
| "grad_norm": 0.0001604136050445959, |
| "learning_rate": 1.5777654166151063e-05, |
| "loss": 0.0, |
| "step": 606000 |
| }, |
| { |
| "epoch": 0.9681902026814, |
| "grad_norm": 0.0035548266023397446, |
| "learning_rate": 1.5758864705142705e-05, |
| "loss": 0.0, |
| "step": 606500 |
| }, |
| { |
| "epoch": 0.9689883809193895, |
| "grad_norm": 0.00016837022849358618, |
| "learning_rate": 1.574007405028033e-05, |
| "loss": 0.0, |
| "step": 607000 |
| }, |
| { |
| "epoch": 0.9697865591573792, |
| "grad_norm": 0.0028717576060444117, |
| "learning_rate": 1.5721282231125607e-05, |
| "loss": 0.0, |
| "step": 607500 |
| }, |
| { |
| "epoch": 0.9705847373953688, |
| "grad_norm": 0.00012833454820793122, |
| "learning_rate": 1.570248927724201e-05, |
| "loss": 0.0, |
| "step": 608000 |
| }, |
| { |
| "epoch": 0.9713829156333584, |
| "grad_norm": 0.0005946651217527688, |
| "learning_rate": 1.5683695218194816e-05, |
| "loss": 0.0, |
| "step": 608500 |
| }, |
| { |
| "epoch": 0.972181093871348, |
| "grad_norm": 0.00016886359662748873, |
| "learning_rate": 1.5664900083551052e-05, |
| "loss": 0.0, |
| "step": 609000 |
| }, |
| { |
| "epoch": 0.9729792721093377, |
| "grad_norm": 0.0061637298204004765, |
| "learning_rate": 1.564610390287941e-05, |
| "loss": 0.0, |
| "step": 609500 |
| }, |
| { |
| "epoch": 0.9737774503473272, |
| "grad_norm": 7.434777944581583e-05, |
| "learning_rate": 1.5627306705750245e-05, |
| "loss": 0.0, |
| "step": 610000 |
| }, |
| { |
| "epoch": 0.9745756285853169, |
| "grad_norm": 31.525394439697266, |
| "learning_rate": 1.5608508521735514e-05, |
| "loss": 0.0, |
| "step": 610500 |
| }, |
| { |
| "epoch": 0.9753738068233064, |
| "grad_norm": 0.0001488685084041208, |
| "learning_rate": 1.558970938040871e-05, |
| "loss": 0.0, |
| "step": 611000 |
| }, |
| { |
| "epoch": 0.9761719850612961, |
| "grad_norm": 0.00011100114352302626, |
| "learning_rate": 1.557090931134484e-05, |
| "loss": 0.0, |
| "step": 611500 |
| }, |
| { |
| "epoch": 0.9769701632992858, |
| "grad_norm": 0.00014055597421247512, |
| "learning_rate": 1.5552108344120384e-05, |
| "loss": 0.0, |
| "step": 612000 |
| }, |
| { |
| "epoch": 0.9777683415372753, |
| "grad_norm": 0.0002197181456722319, |
| "learning_rate": 1.5533306508313215e-05, |
| "loss": 0.0, |
| "step": 612500 |
| }, |
| { |
| "epoch": 0.978566519775265, |
| "grad_norm": 0.00010709422349464148, |
| "learning_rate": 1.5514503833502582e-05, |
| "loss": 0.0, |
| "step": 613000 |
| }, |
| { |
| "epoch": 0.9793646980132545, |
| "grad_norm": 7.577735232189298e-05, |
| "learning_rate": 1.5495700349269053e-05, |
| "loss": 0.0, |
| "step": 613500 |
| }, |
| { |
| "epoch": 0.9801628762512442, |
| "grad_norm": 0.00015610417176503688, |
| "learning_rate": 1.5476896085194475e-05, |
| "loss": 0.0, |
| "step": 614000 |
| }, |
| { |
| "epoch": 0.9809610544892338, |
| "grad_norm": 0.012848759070038795, |
| "learning_rate": 1.5458091070861908e-05, |
| "loss": 0.0, |
| "step": 614500 |
| }, |
| { |
| "epoch": 0.9817592327272234, |
| "grad_norm": 0.00010452749847900122, |
| "learning_rate": 1.54392853358556e-05, |
| "loss": 0.0, |
| "step": 615000 |
| }, |
| { |
| "epoch": 0.982557410965213, |
| "grad_norm": 0.00010649808245943859, |
| "learning_rate": 1.542047890976094e-05, |
| "loss": 0.0, |
| "step": 615500 |
| }, |
| { |
| "epoch": 0.9833555892032027, |
| "grad_norm": 0.00017509504687041044, |
| "learning_rate": 1.5401671822164384e-05, |
| "loss": 0.0, |
| "step": 616000 |
| }, |
| { |
| "epoch": 0.9841537674411922, |
| "grad_norm": 0.00021298132196534425, |
| "learning_rate": 1.538286410265346e-05, |
| "loss": 0.0, |
| "step": 616500 |
| }, |
| { |
| "epoch": 0.9849519456791819, |
| "grad_norm": 0.0001403950882377103, |
| "learning_rate": 1.5364055780816666e-05, |
| "loss": 0.0, |
| "step": 617000 |
| }, |
| { |
| "epoch": 0.9857501239171714, |
| "grad_norm": 0.00020762416534125805, |
| "learning_rate": 1.5345246886243443e-05, |
| "loss": 0.0, |
| "step": 617500 |
| }, |
| { |
| "epoch": 0.9865483021551611, |
| "grad_norm": 0.0014126452151685953, |
| "learning_rate": 1.5326437448524157e-05, |
| "loss": 0.0, |
| "step": 618000 |
| }, |
| { |
| "epoch": 0.9873464803931506, |
| "grad_norm": 0.00011496982915559784, |
| "learning_rate": 1.5307627497250003e-05, |
| "loss": 0.0, |
| "step": 618500 |
| }, |
| { |
| "epoch": 0.9881446586311403, |
| "grad_norm": 0.00021757918875664473, |
| "learning_rate": 1.528881706201301e-05, |
| "loss": 0.0, |
| "step": 619000 |
| }, |
| { |
| "epoch": 0.9889428368691299, |
| "grad_norm": 0.00021418675896711648, |
| "learning_rate": 1.5270006172405944e-05, |
| "loss": 0.0, |
| "step": 619500 |
| }, |
| { |
| "epoch": 0.9897410151071195, |
| "grad_norm": 0.00015302287647500634, |
| "learning_rate": 1.5251194858022304e-05, |
| "loss": 0.0, |
| "step": 620000 |
| }, |
| { |
| "epoch": 0.9905391933451091, |
| "grad_norm": 0.00011390163126634434, |
| "learning_rate": 1.5232383148456244e-05, |
| "loss": 0.0, |
| "step": 620500 |
| }, |
| { |
| "epoch": 0.9913373715830988, |
| "grad_norm": 0.0001891214051283896, |
| "learning_rate": 1.5213571073302543e-05, |
| "loss": 0.0, |
| "step": 621000 |
| }, |
| { |
| "epoch": 0.9921355498210883, |
| "grad_norm": 0.0003676644410006702, |
| "learning_rate": 1.5194758662156562e-05, |
| "loss": 0.0, |
| "step": 621500 |
| }, |
| { |
| "epoch": 0.992933728059078, |
| "grad_norm": 0.0007799739250913262, |
| "learning_rate": 1.5175945944614187e-05, |
| "loss": 0.0, |
| "step": 622000 |
| }, |
| { |
| "epoch": 0.9937319062970675, |
| "grad_norm": 0.00016387697542086244, |
| "learning_rate": 1.5157132950271781e-05, |
| "loss": 0.0, |
| "step": 622500 |
| }, |
| { |
| "epoch": 0.9945300845350572, |
| "grad_norm": 0.00018665984680410475, |
| "learning_rate": 1.5138319708726152e-05, |
| "loss": 0.0, |
| "step": 623000 |
| }, |
| { |
| "epoch": 0.9953282627730468, |
| "grad_norm": 0.00018064910545945168, |
| "learning_rate": 1.5119506249574488e-05, |
| "loss": 0.0, |
| "step": 623500 |
| }, |
| { |
| "epoch": 0.9961264410110364, |
| "grad_norm": 9.348099410999566e-05, |
| "learning_rate": 1.510069260241432e-05, |
| "loss": 0.0, |
| "step": 624000 |
| }, |
| { |
| "epoch": 0.996924619249026, |
| "grad_norm": 9.880396100925282e-05, |
| "learning_rate": 1.508187879684348e-05, |
| "loss": 0.0, |
| "step": 624500 |
| }, |
| { |
| "epoch": 0.9977227974870156, |
| "grad_norm": 0.000131562483147718, |
| "learning_rate": 1.506306486246005e-05, |
| "loss": 0.0, |
| "step": 625000 |
| }, |
| { |
| "epoch": 0.9985209757250052, |
| "grad_norm": 0.0006260851514525712, |
| "learning_rate": 1.5044250828862306e-05, |
| "loss": 0.0, |
| "step": 625500 |
| }, |
| { |
| "epoch": 0.9993191539629949, |
| "grad_norm": 0.00013737600238528103, |
| "learning_rate": 1.502543672564869e-05, |
| "loss": 0.0, |
| "step": 626000 |
| }, |
| { |
| "epoch": 1.0001173322009844, |
| "grad_norm": 0.0001540034863865003, |
| "learning_rate": 1.5006622582417749e-05, |
| "loss": 0.0, |
| "step": 626500 |
| }, |
| { |
| "epoch": 1.000915510438974, |
| "grad_norm": 0.0002153823006665334, |
| "learning_rate": 1.4987808428768086e-05, |
| "loss": 0.0, |
| "step": 627000 |
| }, |
| { |
| "epoch": 1.0017136886769638, |
| "grad_norm": 0.00011496929073473439, |
| "learning_rate": 1.4968994294298335e-05, |
| "loss": 0.0, |
| "step": 627500 |
| }, |
| { |
| "epoch": 1.0025118669149533, |
| "grad_norm": 0.0003018941206391901, |
| "learning_rate": 1.4950180208607091e-05, |
| "loss": 0.0, |
| "step": 628000 |
| }, |
| { |
| "epoch": 1.0033100451529429, |
| "grad_norm": 0.00031068435055203736, |
| "learning_rate": 1.493136620129287e-05, |
| "loss": 0.0, |
| "step": 628500 |
| }, |
| { |
| "epoch": 1.0041082233909326, |
| "grad_norm": 0.54404616355896, |
| "learning_rate": 1.4912552301954071e-05, |
| "loss": 0.0, |
| "step": 629000 |
| }, |
| { |
| "epoch": 1.0049064016289222, |
| "grad_norm": 0.002838683081790805, |
| "learning_rate": 1.4893738540188918e-05, |
| "loss": 0.0, |
| "step": 629500 |
| }, |
| { |
| "epoch": 1.0057045798669118, |
| "grad_norm": 0.00013099922216497362, |
| "learning_rate": 1.4874924945595423e-05, |
| "loss": 0.0, |
| "step": 630000 |
| }, |
| { |
| "epoch": 1.0065027581049013, |
| "grad_norm": 358.38702392578125, |
| "learning_rate": 1.485611154777133e-05, |
| "loss": 0.0, |
| "step": 630500 |
| }, |
| { |
| "epoch": 1.007300936342891, |
| "grad_norm": 0.0003976974403485656, |
| "learning_rate": 1.4837298376314081e-05, |
| "loss": 0.0, |
| "step": 631000 |
| }, |
| { |
| "epoch": 1.0080991145808806, |
| "grad_norm": 0.0004381372418720275, |
| "learning_rate": 1.4818485460820754e-05, |
| "loss": 0.0, |
| "step": 631500 |
| }, |
| { |
| "epoch": 1.0088972928188702, |
| "grad_norm": 0.0007524039829149842, |
| "learning_rate": 1.4799672830888027e-05, |
| "loss": 0.0, |
| "step": 632000 |
| }, |
| { |
| "epoch": 1.0096954710568598, |
| "grad_norm": 0.00017187898629345, |
| "learning_rate": 1.4780860516112132e-05, |
| "loss": 0.0, |
| "step": 632500 |
| }, |
| { |
| "epoch": 1.0104936492948495, |
| "grad_norm": 0.0002448650193400681, |
| "learning_rate": 1.4762048546088797e-05, |
| "loss": 0.0, |
| "step": 633000 |
| }, |
| { |
| "epoch": 1.011291827532839, |
| "grad_norm": 0.0002278346655657515, |
| "learning_rate": 1.474323695041322e-05, |
| "loss": 0.0, |
| "step": 633500 |
| }, |
| { |
| "epoch": 1.0120900057708286, |
| "grad_norm": 0.0001101360612665303, |
| "learning_rate": 1.4724425758679997e-05, |
| "loss": 0.0, |
| "step": 634000 |
| }, |
| { |
| "epoch": 1.0128881840088182, |
| "grad_norm": 0.00014911351900082082, |
| "learning_rate": 1.4705615000483101e-05, |
| "loss": 0.0, |
| "step": 634500 |
| }, |
| { |
| "epoch": 1.013686362246808, |
| "grad_norm": 0.00017947569722309709, |
| "learning_rate": 1.4686804705415812e-05, |
| "loss": 0.0, |
| "step": 635000 |
| }, |
| { |
| "epoch": 1.0144845404847975, |
| "grad_norm": 0.0002458689850755036, |
| "learning_rate": 1.4667994903070683e-05, |
| "loss": 0.0, |
| "step": 635500 |
| }, |
| { |
| "epoch": 1.015282718722787, |
| "grad_norm": 0.0003903007018379867, |
| "learning_rate": 1.4649185623039503e-05, |
| "loss": 0.0, |
| "step": 636000 |
| }, |
| { |
| "epoch": 1.0160808969607766, |
| "grad_norm": 0.00017779128393158317, |
| "learning_rate": 1.4630376894913225e-05, |
| "loss": 0.0, |
| "step": 636500 |
| }, |
| { |
| "epoch": 1.0168790751987664, |
| "grad_norm": 0.00012018175766570494, |
| "learning_rate": 1.4611568748281943e-05, |
| "loss": 0.0, |
| "step": 637000 |
| }, |
| { |
| "epoch": 1.017677253436756, |
| "grad_norm": 0.0001338824804406613, |
| "learning_rate": 1.4592761212734836e-05, |
| "loss": 0.0, |
| "step": 637500 |
| }, |
| { |
| "epoch": 1.0184754316747455, |
| "grad_norm": 0.00010453088179929182, |
| "learning_rate": 1.4573954317860103e-05, |
| "loss": 0.0, |
| "step": 638000 |
| }, |
| { |
| "epoch": 1.019273609912735, |
| "grad_norm": 6.280023808358237e-05, |
| "learning_rate": 1.4555148093244975e-05, |
| "loss": 0.0, |
| "step": 638500 |
| }, |
| { |
| "epoch": 1.0200717881507249, |
| "grad_norm": 0.0002268562384415418, |
| "learning_rate": 1.453634256847558e-05, |
| "loss": 0.0, |
| "step": 639000 |
| }, |
| { |
| "epoch": 1.0208699663887144, |
| "grad_norm": 0.00013508339179679751, |
| "learning_rate": 1.4517537773136987e-05, |
| "loss": 0.0, |
| "step": 639500 |
| }, |
| { |
| "epoch": 1.021668144626704, |
| "grad_norm": 0.00012045284529449418, |
| "learning_rate": 1.449873373681309e-05, |
| "loss": 0.0, |
| "step": 640000 |
| }, |
| { |
| "epoch": 1.0224663228646935, |
| "grad_norm": 9.69572938629426e-05, |
| "learning_rate": 1.4479930489086606e-05, |
| "loss": 0.0, |
| "step": 640500 |
| }, |
| { |
| "epoch": 1.0232645011026833, |
| "grad_norm": 0.00012936844723299146, |
| "learning_rate": 1.4461128059539004e-05, |
| "loss": 0.0, |
| "step": 641000 |
| }, |
| { |
| "epoch": 1.0240626793406729, |
| "grad_norm": 0.00017553639190737158, |
| "learning_rate": 1.4442326477750453e-05, |
| "loss": 0.0, |
| "step": 641500 |
| }, |
| { |
| "epoch": 1.0248608575786624, |
| "grad_norm": 0.0002106795145664364, |
| "learning_rate": 1.4423525773299819e-05, |
| "loss": 0.0, |
| "step": 642000 |
| }, |
| { |
| "epoch": 1.025659035816652, |
| "grad_norm": 0.00018383291899226606, |
| "learning_rate": 1.4404725975764552e-05, |
| "loss": 0.0, |
| "step": 642500 |
| }, |
| { |
| "epoch": 1.0264572140546417, |
| "grad_norm": 0.00012423249427229166, |
| "learning_rate": 1.438592711472071e-05, |
| "loss": 0.0, |
| "step": 643000 |
| }, |
| { |
| "epoch": 1.0272553922926313, |
| "grad_norm": 0.00032292716787196696, |
| "learning_rate": 1.4367129219742846e-05, |
| "loss": 0.0, |
| "step": 643500 |
| }, |
| { |
| "epoch": 1.0280535705306209, |
| "grad_norm": 0.00022273634385783225, |
| "learning_rate": 1.4348332320404026e-05, |
| "loss": 0.0, |
| "step": 644000 |
| }, |
| { |
| "epoch": 1.0288517487686106, |
| "grad_norm": 0.0002325259702047333, |
| "learning_rate": 1.4329536446275714e-05, |
| "loss": 0.0, |
| "step": 644500 |
| }, |
| { |
| "epoch": 1.0296499270066002, |
| "grad_norm": 0.03865138441324234, |
| "learning_rate": 1.4310741626927782e-05, |
| "loss": 0.0, |
| "step": 645000 |
| }, |
| { |
| "epoch": 1.0304481052445897, |
| "grad_norm": 0.0013154677581042051, |
| "learning_rate": 1.4291947891928453e-05, |
| "loss": 0.0, |
| "step": 645500 |
| }, |
| { |
| "epoch": 1.0312462834825793, |
| "grad_norm": 0.00014797232870478183, |
| "learning_rate": 1.427315527084421e-05, |
| "loss": 0.0, |
| "step": 646000 |
| }, |
| { |
| "epoch": 1.032044461720569, |
| "grad_norm": 1216.7861328125, |
| "learning_rate": 1.4254363793239825e-05, |
| "loss": 0.0, |
| "step": 646500 |
| }, |
| { |
| "epoch": 1.0328426399585586, |
| "grad_norm": 7.254226511577144e-05, |
| "learning_rate": 1.4235573488678238e-05, |
| "loss": 0.0, |
| "step": 647000 |
| }, |
| { |
| "epoch": 1.0336408181965482, |
| "grad_norm": 0.0005647933576256037, |
| "learning_rate": 1.4216784386720553e-05, |
| "loss": 0.0, |
| "step": 647500 |
| }, |
| { |
| "epoch": 1.0344389964345377, |
| "grad_norm": 0.0017171679064631462, |
| "learning_rate": 1.419799651692599e-05, |
| "loss": 0.0, |
| "step": 648000 |
| }, |
| { |
| "epoch": 1.0352371746725275, |
| "grad_norm": 0.00015221527428366244, |
| "learning_rate": 1.4179209908851827e-05, |
| "loss": 0.0, |
| "step": 648500 |
| }, |
| { |
| "epoch": 1.036035352910517, |
| "grad_norm": 0.0007889217813499272, |
| "learning_rate": 1.4160424592053353e-05, |
| "loss": 0.0, |
| "step": 649000 |
| }, |
| { |
| "epoch": 1.0368335311485066, |
| "grad_norm": 0.0001243455772055313, |
| "learning_rate": 1.4141640596083822e-05, |
| "loss": 0.0, |
| "step": 649500 |
| }, |
| { |
| "epoch": 1.0376317093864962, |
| "grad_norm": 0.0001685485476627946, |
| "learning_rate": 1.4122857950494433e-05, |
| "loss": 0.0, |
| "step": 650000 |
| }, |
| { |
| "epoch": 1.038429887624486, |
| "grad_norm": 0.0006130459951236844, |
| "learning_rate": 1.4104076684834227e-05, |
| "loss": 0.0, |
| "step": 650500 |
| }, |
| { |
| "epoch": 1.0392280658624755, |
| "grad_norm": 0.0002899725513998419, |
| "learning_rate": 1.4085296828650094e-05, |
| "loss": 0.0, |
| "step": 651000 |
| }, |
| { |
| "epoch": 1.040026244100465, |
| "grad_norm": 0.00017135177040472627, |
| "learning_rate": 1.406651841148671e-05, |
| "loss": 0.0, |
| "step": 651500 |
| }, |
| { |
| "epoch": 1.0408244223384546, |
| "grad_norm": 0.00029842773801647127, |
| "learning_rate": 1.404774146288647e-05, |
| "loss": 0.0, |
| "step": 652000 |
| }, |
| { |
| "epoch": 1.0416226005764444, |
| "grad_norm": 0.00023503368720412254, |
| "learning_rate": 1.4028966012389477e-05, |
| "loss": 0.0, |
| "step": 652500 |
| }, |
| { |
| "epoch": 1.042420778814434, |
| "grad_norm": 0.00017431171727366745, |
| "learning_rate": 1.4010192089533466e-05, |
| "loss": 0.0, |
| "step": 653000 |
| }, |
| { |
| "epoch": 1.0432189570524235, |
| "grad_norm": 0.00014720844046678394, |
| "learning_rate": 1.3991419723853775e-05, |
| "loss": 0.0, |
| "step": 653500 |
| }, |
| { |
| "epoch": 1.044017135290413, |
| "grad_norm": 0.00020027408027090132, |
| "learning_rate": 1.3972648944883288e-05, |
| "loss": 0.0, |
| "step": 654000 |
| }, |
| { |
| "epoch": 1.0448153135284028, |
| "grad_norm": 0.0001721490261843428, |
| "learning_rate": 1.3953879782152388e-05, |
| "loss": 0.0, |
| "step": 654500 |
| }, |
| { |
| "epoch": 1.0456134917663924, |
| "grad_norm": 0.00016651615442242473, |
| "learning_rate": 1.393511226518893e-05, |
| "loss": 0.0, |
| "step": 655000 |
| }, |
| { |
| "epoch": 1.046411670004382, |
| "grad_norm": 0.00025675626238808036, |
| "learning_rate": 1.3916346423518161e-05, |
| "loss": 0.0, |
| "step": 655500 |
| }, |
| { |
| "epoch": 1.0472098482423715, |
| "grad_norm": 0.0014689648523926735, |
| "learning_rate": 1.3897582286662714e-05, |
| "loss": 0.0, |
| "step": 656000 |
| }, |
| { |
| "epoch": 1.0480080264803613, |
| "grad_norm": 0.00023966317530721426, |
| "learning_rate": 1.387881988414252e-05, |
| "loss": 0.0, |
| "step": 656500 |
| }, |
| { |
| "epoch": 1.0488062047183508, |
| "grad_norm": 0.00015946978237479925, |
| "learning_rate": 1.3860059245474792e-05, |
| "loss": 0.0, |
| "step": 657000 |
| }, |
| { |
| "epoch": 1.0496043829563404, |
| "grad_norm": 0.0002671683905646205, |
| "learning_rate": 1.3841300400173968e-05, |
| "loss": 0.0, |
| "step": 657500 |
| }, |
| { |
| "epoch": 1.0504025611943302, |
| "grad_norm": 0.0008995188982225955, |
| "learning_rate": 1.3822543377751657e-05, |
| "loss": 0.0, |
| "step": 658000 |
| }, |
| { |
| "epoch": 1.0512007394323197, |
| "grad_norm": 0.00017635516996961087, |
| "learning_rate": 1.3803788207716616e-05, |
| "loss": 0.0, |
| "step": 658500 |
| }, |
| { |
| "epoch": 1.0519989176703093, |
| "grad_norm": 9.415735985385254e-05, |
| "learning_rate": 1.3785034919574666e-05, |
| "loss": 0.0, |
| "step": 659000 |
| }, |
| { |
| "epoch": 1.0527970959082988, |
| "grad_norm": 0.00011075485235778615, |
| "learning_rate": 1.376628354282869e-05, |
| "loss": 0.0, |
| "step": 659500 |
| }, |
| { |
| "epoch": 1.0535952741462886, |
| "grad_norm": 0.00012287896242924035, |
| "learning_rate": 1.3747534106978547e-05, |
| "loss": 0.0, |
| "step": 660000 |
| }, |
| { |
| "epoch": 1.0543934523842782, |
| "grad_norm": 0.00021028977062087506, |
| "learning_rate": 1.3728786641521046e-05, |
| "loss": 0.0, |
| "step": 660500 |
| }, |
| { |
| "epoch": 1.0551916306222677, |
| "grad_norm": 0.00026298430748283863, |
| "learning_rate": 1.3710041175949905e-05, |
| "loss": 0.0, |
| "step": 661000 |
| }, |
| { |
| "epoch": 1.0559898088602573, |
| "grad_norm": 0.00010695758101064712, |
| "learning_rate": 1.3691297739755685e-05, |
| "loss": 0.0, |
| "step": 661500 |
| }, |
| { |
| "epoch": 1.056787987098247, |
| "grad_norm": 0.00011361335054971278, |
| "learning_rate": 1.3672556362425764e-05, |
| "loss": 0.0, |
| "step": 662000 |
| }, |
| { |
| "epoch": 1.0575861653362366, |
| "grad_norm": 0.00017494260100647807, |
| "learning_rate": 1.3653817073444268e-05, |
| "loss": 0.0, |
| "step": 662500 |
| }, |
| { |
| "epoch": 1.0583843435742262, |
| "grad_norm": 0.0002041016996372491, |
| "learning_rate": 1.3635079902292054e-05, |
| "loss": 0.0, |
| "step": 663000 |
| }, |
| { |
| "epoch": 1.0591825218122157, |
| "grad_norm": 0.0003808980109170079, |
| "learning_rate": 1.3616344878446634e-05, |
| "loss": 0.0, |
| "step": 663500 |
| }, |
| { |
| "epoch": 1.0599807000502055, |
| "grad_norm": 0.00014107560855336487, |
| "learning_rate": 1.3597612031382143e-05, |
| "loss": 0.0, |
| "step": 664000 |
| }, |
| { |
| "epoch": 1.060778878288195, |
| "grad_norm": 0.00017715631111059338, |
| "learning_rate": 1.3578881390569305e-05, |
| "loss": 0.0, |
| "step": 664500 |
| }, |
| { |
| "epoch": 1.0615770565261846, |
| "grad_norm": 0.000133657013066113, |
| "learning_rate": 1.3560152985475353e-05, |
| "loss": 0.0, |
| "step": 665000 |
| }, |
| { |
| "epoch": 1.0623752347641742, |
| "grad_norm": 0.0020608582999557257, |
| "learning_rate": 1.354142684556402e-05, |
| "loss": 0.0, |
| "step": 665500 |
| }, |
| { |
| "epoch": 1.063173413002164, |
| "grad_norm": 9.932918328559026e-05, |
| "learning_rate": 1.3522703000295465e-05, |
| "loss": 0.0, |
| "step": 666000 |
| }, |
| { |
| "epoch": 1.0639715912401535, |
| "grad_norm": 0.00020408285490702838, |
| "learning_rate": 1.3503981479126238e-05, |
| "loss": 0.0, |
| "step": 666500 |
| }, |
| { |
| "epoch": 1.064769769478143, |
| "grad_norm": 0.00017636266420595348, |
| "learning_rate": 1.348526231150924e-05, |
| "loss": 0.0, |
| "step": 667000 |
| }, |
| { |
| "epoch": 1.0655679477161326, |
| "grad_norm": 0.005270059686154127, |
| "learning_rate": 1.3466545526893657e-05, |
| "loss": 0.0, |
| "step": 667500 |
| }, |
| { |
| "epoch": 1.0663661259541224, |
| "grad_norm": 0.00017116556409746408, |
| "learning_rate": 1.3447831154724944e-05, |
| "loss": 0.0, |
| "step": 668000 |
| }, |
| { |
| "epoch": 1.067164304192112, |
| "grad_norm": 0.00020884806872345507, |
| "learning_rate": 1.342911922444474e-05, |
| "loss": 0.0, |
| "step": 668500 |
| }, |
| { |
| "epoch": 1.0679624824301015, |
| "grad_norm": 0.00016449626127723604, |
| "learning_rate": 1.341040976549086e-05, |
| "loss": 0.0, |
| "step": 669000 |
| }, |
| { |
| "epoch": 1.068760660668091, |
| "grad_norm": 0.00012128291564295068, |
| "learning_rate": 1.3391702807297222e-05, |
| "loss": 0.0, |
| "step": 669500 |
| }, |
| { |
| "epoch": 1.0695588389060808, |
| "grad_norm": 0.0004791721876244992, |
| "learning_rate": 1.33729983792938e-05, |
| "loss": 0.0, |
| "step": 670000 |
| }, |
| { |
| "epoch": 1.0703570171440704, |
| "grad_norm": 0.0005541092832572758, |
| "learning_rate": 1.3354296510906615e-05, |
| "loss": 0.0, |
| "step": 670500 |
| }, |
| { |
| "epoch": 1.07115519538206, |
| "grad_norm": 0.22373533248901367, |
| "learning_rate": 1.3335597231557637e-05, |
| "loss": 0.0, |
| "step": 671000 |
| }, |
| { |
| "epoch": 1.0719533736200495, |
| "grad_norm": 0.00018599227769300342, |
| "learning_rate": 1.3316900570664773e-05, |
| "loss": 0.0, |
| "step": 671500 |
| }, |
| { |
| "epoch": 1.0727515518580393, |
| "grad_norm": 0.023390674963593483, |
| "learning_rate": 1.3298206557641807e-05, |
| "loss": 0.0, |
| "step": 672000 |
| }, |
| { |
| "epoch": 1.0735497300960288, |
| "grad_norm": 0.0001526430423837155, |
| "learning_rate": 1.327951522189836e-05, |
| "loss": 0.0, |
| "step": 672500 |
| }, |
| { |
| "epoch": 1.0743479083340184, |
| "grad_norm": 0.0005466815200634301, |
| "learning_rate": 1.3260826592839843e-05, |
| "loss": 0.0, |
| "step": 673000 |
| }, |
| { |
| "epoch": 1.075146086572008, |
| "grad_norm": 0.0001984064292628318, |
| "learning_rate": 1.3242140699867394e-05, |
| "loss": 0.0, |
| "step": 673500 |
| }, |
| { |
| "epoch": 1.0759442648099977, |
| "grad_norm": 0.0001757918653311208, |
| "learning_rate": 1.3223457572377876e-05, |
| "loss": 0.0, |
| "step": 674000 |
| }, |
| { |
| "epoch": 1.0767424430479873, |
| "grad_norm": 0.00047943569370545447, |
| "learning_rate": 1.3204777239763763e-05, |
| "loss": 0.0, |
| "step": 674500 |
| }, |
| { |
| "epoch": 1.0775406212859768, |
| "grad_norm": 0.00018678620108403265, |
| "learning_rate": 1.3186099731413175e-05, |
| "loss": 0.0, |
| "step": 675000 |
| }, |
| { |
| "epoch": 1.0783387995239666, |
| "grad_norm": 0.0001615070941625163, |
| "learning_rate": 1.316742507670975e-05, |
| "loss": 0.0, |
| "step": 675500 |
| }, |
| { |
| "epoch": 1.0791369777619562, |
| "grad_norm": 0.00011800303036579862, |
| "learning_rate": 1.3148753305032651e-05, |
| "loss": 0.0, |
| "step": 676000 |
| }, |
| { |
| "epoch": 1.0799351559999457, |
| "grad_norm": 0.00014160935825202614, |
| "learning_rate": 1.3130084445756528e-05, |
| "loss": 0.0, |
| "step": 676500 |
| }, |
| { |
| "epoch": 1.0807333342379353, |
| "grad_norm": 0.0001385206269333139, |
| "learning_rate": 1.3111418528251405e-05, |
| "loss": 0.0, |
| "step": 677000 |
| }, |
| { |
| "epoch": 1.081531512475925, |
| "grad_norm": 0.0001729640061967075, |
| "learning_rate": 1.309275558188272e-05, |
| "loss": 0.0, |
| "step": 677500 |
| }, |
| { |
| "epoch": 1.0823296907139146, |
| "grad_norm": 0.0074539934284985065, |
| "learning_rate": 1.3074095636011201e-05, |
| "loss": 0.0, |
| "step": 678000 |
| }, |
| { |
| "epoch": 1.0831278689519042, |
| "grad_norm": 0.0001781294122338295, |
| "learning_rate": 1.3055438719992892e-05, |
| "loss": 0.0, |
| "step": 678500 |
| }, |
| { |
| "epoch": 1.0839260471898937, |
| "grad_norm": 0.00014573968655895442, |
| "learning_rate": 1.3036784863179042e-05, |
| "loss": 0.0, |
| "step": 679000 |
| }, |
| { |
| "epoch": 1.0847242254278835, |
| "grad_norm": 0.006077378056943417, |
| "learning_rate": 1.301813409491609e-05, |
| "loss": 0.0, |
| "step": 679500 |
| }, |
| { |
| "epoch": 1.085522403665873, |
| "grad_norm": 0.2760709524154663, |
| "learning_rate": 1.2999486444545635e-05, |
| "loss": 0.0, |
| "step": 680000 |
| }, |
| { |
| "epoch": 1.0863205819038626, |
| "grad_norm": 0.006627124268561602, |
| "learning_rate": 1.2980841941404345e-05, |
| "loss": 0.0, |
| "step": 680500 |
| }, |
| { |
| "epoch": 1.0871187601418522, |
| "grad_norm": 0.0002801245136652142, |
| "learning_rate": 1.2962200614823972e-05, |
| "loss": 0.0, |
| "step": 681000 |
| }, |
| { |
| "epoch": 1.087916938379842, |
| "grad_norm": 0.00034790916834026575, |
| "learning_rate": 1.2943562494131222e-05, |
| "loss": 0.0, |
| "step": 681500 |
| }, |
| { |
| "epoch": 1.0887151166178315, |
| "grad_norm": 0.00035252582165412605, |
| "learning_rate": 1.2924927608647807e-05, |
| "loss": 0.0, |
| "step": 682000 |
| }, |
| { |
| "epoch": 1.089513294855821, |
| "grad_norm": 0.000455355504527688, |
| "learning_rate": 1.2906295987690317e-05, |
| "loss": 0.0, |
| "step": 682500 |
| }, |
| { |
| "epoch": 1.0903114730938106, |
| "grad_norm": 0.0007876930758357048, |
| "learning_rate": 1.2887667660570213e-05, |
| "loss": 0.0, |
| "step": 683000 |
| }, |
| { |
| "epoch": 1.0911096513318004, |
| "grad_norm": 0.00048001037794165313, |
| "learning_rate": 1.2869042656593782e-05, |
| "loss": 0.0, |
| "step": 683500 |
| }, |
| { |
| "epoch": 1.09190782956979, |
| "grad_norm": 0.0004877329047303647, |
| "learning_rate": 1.2850421005062076e-05, |
| "loss": 0.0, |
| "step": 684000 |
| }, |
| { |
| "epoch": 1.0927060078077795, |
| "grad_norm": 0.00021993753034621477, |
| "learning_rate": 1.2831802735270879e-05, |
| "loss": 0.0, |
| "step": 684500 |
| }, |
| { |
| "epoch": 1.093504186045769, |
| "grad_norm": 0.0003302933764643967, |
| "learning_rate": 1.2813187876510645e-05, |
| "loss": 0.0, |
| "step": 685000 |
| }, |
| { |
| "epoch": 1.0943023642837588, |
| "grad_norm": 1113.2105712890625, |
| "learning_rate": 1.2794576458066469e-05, |
| "loss": 0.0, |
| "step": 685500 |
| }, |
| { |
| "epoch": 1.0951005425217484, |
| "grad_norm": 0.0002000959066208452, |
| "learning_rate": 1.2775968509218036e-05, |
| "loss": 0.0, |
| "step": 686000 |
| }, |
| { |
| "epoch": 1.095898720759738, |
| "grad_norm": 0.00015401170821860433, |
| "learning_rate": 1.2757364059239562e-05, |
| "loss": 0.0, |
| "step": 686500 |
| }, |
| { |
| "epoch": 1.0966968989977275, |
| "grad_norm": 0.0001665474846959114, |
| "learning_rate": 1.2738763137399772e-05, |
| "loss": 0.0, |
| "step": 687000 |
| }, |
| { |
| "epoch": 1.0974950772357173, |
| "grad_norm": 0.00014819370699115098, |
| "learning_rate": 1.2720165772961828e-05, |
| "loss": 0.0, |
| "step": 687500 |
| }, |
| { |
| "epoch": 1.0982932554737068, |
| "grad_norm": 0.00019879864703398198, |
| "learning_rate": 1.270157199518331e-05, |
| "loss": 0.0, |
| "step": 688000 |
| }, |
| { |
| "epoch": 1.0990914337116964, |
| "grad_norm": 0.00024383983691222966, |
| "learning_rate": 1.2682981833316138e-05, |
| "loss": 0.0, |
| "step": 688500 |
| }, |
| { |
| "epoch": 1.0998896119496862, |
| "grad_norm": 0.0001553635229356587, |
| "learning_rate": 1.2664395316606553e-05, |
| "loss": 0.0, |
| "step": 689000 |
| }, |
| { |
| "epoch": 1.1006877901876757, |
| "grad_norm": 0.04126366972923279, |
| "learning_rate": 1.2645812474295068e-05, |
| "loss": 0.0, |
| "step": 689500 |
| }, |
| { |
| "epoch": 1.1014859684256653, |
| "grad_norm": 0.0001506950065959245, |
| "learning_rate": 1.2627233335616397e-05, |
| "loss": 0.0, |
| "step": 690000 |
| }, |
| { |
| "epoch": 1.1022841466636548, |
| "grad_norm": 0.00024802706320770085, |
| "learning_rate": 1.260865792979945e-05, |
| "loss": 0.0, |
| "step": 690500 |
| }, |
| { |
| "epoch": 1.1030823249016444, |
| "grad_norm": 0.00022820830054115504, |
| "learning_rate": 1.259008628606724e-05, |
| "loss": 0.0, |
| "step": 691000 |
| }, |
| { |
| "epoch": 1.1038805031396342, |
| "grad_norm": 0.00021335756173357368, |
| "learning_rate": 1.2571518433636885e-05, |
| "loss": 0.0, |
| "step": 691500 |
| }, |
| { |
| "epoch": 1.1046786813776237, |
| "grad_norm": 0.00015809416072443128, |
| "learning_rate": 1.2552954401719521e-05, |
| "loss": 0.0, |
| "step": 692000 |
| }, |
| { |
| "epoch": 1.1054768596156133, |
| "grad_norm": 0.00010023624054156244, |
| "learning_rate": 1.2534394219520282e-05, |
| "loss": 0.0, |
| "step": 692500 |
| }, |
| { |
| "epoch": 1.106275037853603, |
| "grad_norm": 0.0005553970113396645, |
| "learning_rate": 1.2515837916238249e-05, |
| "loss": 0.0, |
| "step": 693000 |
| }, |
| { |
| "epoch": 1.1070732160915926, |
| "grad_norm": 0.00018230122805107385, |
| "learning_rate": 1.2497285521066384e-05, |
| "loss": 0.0, |
| "step": 693500 |
| }, |
| { |
| "epoch": 1.1078713943295821, |
| "grad_norm": 0.00017253353144042194, |
| "learning_rate": 1.2478737063191525e-05, |
| "loss": 0.0, |
| "step": 694000 |
| }, |
| { |
| "epoch": 1.1086695725675717, |
| "grad_norm": 0.00014320742047857493, |
| "learning_rate": 1.2460192571794297e-05, |
| "loss": 0.0, |
| "step": 694500 |
| }, |
| { |
| "epoch": 1.1094677508055615, |
| "grad_norm": 0.0002893557248171419, |
| "learning_rate": 1.2441652076049085e-05, |
| "loss": 0.0, |
| "step": 695000 |
| }, |
| { |
| "epoch": 1.110265929043551, |
| "grad_norm": 0.0001792061812011525, |
| "learning_rate": 1.2423115605124003e-05, |
| "loss": 0.0, |
| "step": 695500 |
| }, |
| { |
| "epoch": 1.1110641072815406, |
| "grad_norm": 0.00013314814714249223, |
| "learning_rate": 1.2404583188180819e-05, |
| "loss": 0.0, |
| "step": 696000 |
| }, |
| { |
| "epoch": 1.1118622855195301, |
| "grad_norm": 0.037688203155994415, |
| "learning_rate": 1.2386054854374931e-05, |
| "loss": 0.0, |
| "step": 696500 |
| }, |
| { |
| "epoch": 1.11266046375752, |
| "grad_norm": 0.009215892292559147, |
| "learning_rate": 1.2367530632855307e-05, |
| "loss": 0.0, |
| "step": 697000 |
| }, |
| { |
| "epoch": 1.1134586419955095, |
| "grad_norm": 0.00022860315220896155, |
| "learning_rate": 1.2349010552764452e-05, |
| "loss": 0.0, |
| "step": 697500 |
| }, |
| { |
| "epoch": 1.114256820233499, |
| "grad_norm": 0.00016303629672620445, |
| "learning_rate": 1.2330494643238355e-05, |
| "loss": 0.0, |
| "step": 698000 |
| }, |
| { |
| "epoch": 1.1150549984714886, |
| "grad_norm": 0.00031790099455974996, |
| "learning_rate": 1.2311982933406434e-05, |
| "loss": 0.0, |
| "step": 698500 |
| }, |
| { |
| "epoch": 1.1158531767094784, |
| "grad_norm": 0.040996525436639786, |
| "learning_rate": 1.2293475452391517e-05, |
| "loss": 0.0, |
| "step": 699000 |
| }, |
| { |
| "epoch": 1.116651354947468, |
| "grad_norm": 0.00023965245054569095, |
| "learning_rate": 1.2274972229309758e-05, |
| "loss": 0.0, |
| "step": 699500 |
| }, |
| { |
| "epoch": 1.1174495331854575, |
| "grad_norm": 0.00019742768199648708, |
| "learning_rate": 1.2256473293270635e-05, |
| "loss": 0.0, |
| "step": 700000 |
| }, |
| { |
| "epoch": 1.118247711423447, |
| "grad_norm": 0.000235430255997926, |
| "learning_rate": 1.2237978673376863e-05, |
| "loss": 0.0, |
| "step": 700500 |
| }, |
| { |
| "epoch": 1.1190458896614368, |
| "grad_norm": 0.0002709669934120029, |
| "learning_rate": 1.2219488398724383e-05, |
| "loss": 0.0, |
| "step": 701000 |
| }, |
| { |
| "epoch": 1.1198440678994264, |
| "grad_norm": 0.00020392390433698893, |
| "learning_rate": 1.2201002498402283e-05, |
| "loss": 0.0, |
| "step": 701500 |
| }, |
| { |
| "epoch": 1.120642246137416, |
| "grad_norm": 0.00025341068976558745, |
| "learning_rate": 1.218252100149278e-05, |
| "loss": 0.0, |
| "step": 702000 |
| }, |
| { |
| "epoch": 1.1214404243754055, |
| "grad_norm": 0.00020573250367306173, |
| "learning_rate": 1.2164043937071166e-05, |
| "loss": 0.0, |
| "step": 702500 |
| }, |
| { |
| "epoch": 1.1222386026133953, |
| "grad_norm": 0.00016997568309307098, |
| "learning_rate": 1.2145571334205747e-05, |
| "loss": 0.0, |
| "step": 703000 |
| }, |
| { |
| "epoch": 1.1230367808513848, |
| "grad_norm": 0.001076328568160534, |
| "learning_rate": 1.2127103221957824e-05, |
| "loss": 0.0, |
| "step": 703500 |
| }, |
| { |
| "epoch": 1.1238349590893744, |
| "grad_norm": 0.0008634846308268607, |
| "learning_rate": 1.210863962938163e-05, |
| "loss": 0.0, |
| "step": 704000 |
| }, |
| { |
| "epoch": 1.124633137327364, |
| "grad_norm": 0.00017522821144666523, |
| "learning_rate": 1.2090180585524273e-05, |
| "loss": 0.0, |
| "step": 704500 |
| }, |
| { |
| "epoch": 1.1254313155653537, |
| "grad_norm": 0.0005584707832895219, |
| "learning_rate": 1.2071726119425731e-05, |
| "loss": 0.0, |
| "step": 705000 |
| }, |
| { |
| "epoch": 1.1262294938033433, |
| "grad_norm": 0.00016125263937283307, |
| "learning_rate": 1.205327626011875e-05, |
| "loss": 0.0, |
| "step": 705500 |
| }, |
| { |
| "epoch": 1.1270276720413328, |
| "grad_norm": 0.00014472042676061392, |
| "learning_rate": 1.2034831036628866e-05, |
| "loss": 0.0, |
| "step": 706000 |
| }, |
| { |
| "epoch": 1.1278258502793226, |
| "grad_norm": 0.0004964773543179035, |
| "learning_rate": 1.2016390477974277e-05, |
| "loss": 0.0, |
| "step": 706500 |
| }, |
| { |
| "epoch": 1.1286240285173121, |
| "grad_norm": 0.00017839822976384312, |
| "learning_rate": 1.1997954613165885e-05, |
| "loss": 0.0, |
| "step": 707000 |
| }, |
| { |
| "epoch": 1.1294222067553017, |
| "grad_norm": 0.00019834449631161988, |
| "learning_rate": 1.1979523471207184e-05, |
| "loss": 0.0, |
| "step": 707500 |
| }, |
| { |
| "epoch": 1.1302203849932912, |
| "grad_norm": 0.00014970562187954783, |
| "learning_rate": 1.196109708109423e-05, |
| "loss": 0.0, |
| "step": 708000 |
| }, |
| { |
| "epoch": 1.1310185632312808, |
| "grad_norm": 0.00013826471695210785, |
| "learning_rate": 1.194267547181563e-05, |
| "loss": 0.0, |
| "step": 708500 |
| }, |
| { |
| "epoch": 1.1318167414692706, |
| "grad_norm": 0.0002837497158907354, |
| "learning_rate": 1.1924258672352443e-05, |
| "loss": 0.0, |
| "step": 709000 |
| }, |
| { |
| "epoch": 1.1326149197072601, |
| "grad_norm": 0.09026394784450531, |
| "learning_rate": 1.190584671167819e-05, |
| "loss": 0.0, |
| "step": 709500 |
| }, |
| { |
| "epoch": 1.1334130979452497, |
| "grad_norm": 0.0003425665490794927, |
| "learning_rate": 1.1887439618758744e-05, |
| "loss": 0.0, |
| "step": 710000 |
| }, |
| { |
| "epoch": 1.1342112761832395, |
| "grad_norm": 0.0004137590294703841, |
| "learning_rate": 1.186903742255236e-05, |
| "loss": 0.0, |
| "step": 710500 |
| }, |
| { |
| "epoch": 1.135009454421229, |
| "grad_norm": 0.0014002566458657384, |
| "learning_rate": 1.1850640152009552e-05, |
| "loss": 0.0, |
| "step": 711000 |
| }, |
| { |
| "epoch": 1.1358076326592186, |
| "grad_norm": 0.00042761804070323706, |
| "learning_rate": 1.18322478360731e-05, |
| "loss": 0.0, |
| "step": 711500 |
| }, |
| { |
| "epoch": 1.1366058108972081, |
| "grad_norm": 0.0002522016875445843, |
| "learning_rate": 1.1813860503678006e-05, |
| "loss": 0.0, |
| "step": 712000 |
| }, |
| { |
| "epoch": 1.137403989135198, |
| "grad_norm": 0.0008443945553153753, |
| "learning_rate": 1.17954781837514e-05, |
| "loss": 0.0, |
| "step": 712500 |
| }, |
| { |
| "epoch": 1.1382021673731875, |
| "grad_norm": 0.018911859020590782, |
| "learning_rate": 1.1777100905212562e-05, |
| "loss": 0.0, |
| "step": 713000 |
| }, |
| { |
| "epoch": 1.139000345611177, |
| "grad_norm": 0.00022410589735955, |
| "learning_rate": 1.1758728696972803e-05, |
| "loss": 0.0, |
| "step": 713500 |
| }, |
| { |
| "epoch": 1.1397985238491666, |
| "grad_norm": 0.00019726462778635323, |
| "learning_rate": 1.1740361587935484e-05, |
| "loss": 0.0, |
| "step": 714000 |
| }, |
| { |
| "epoch": 1.1405967020871564, |
| "grad_norm": 0.00034632792812772095, |
| "learning_rate": 1.1721999606995939e-05, |
| "loss": 0.0, |
| "step": 714500 |
| }, |
| { |
| "epoch": 1.141394880325146, |
| "grad_norm": 0.0001660481939325109, |
| "learning_rate": 1.1703642783041423e-05, |
| "loss": 0.0, |
| "step": 715000 |
| }, |
| { |
| "epoch": 1.1421930585631355, |
| "grad_norm": 0.00020775549637619406, |
| "learning_rate": 1.1685291144951097e-05, |
| "loss": 0.0, |
| "step": 715500 |
| }, |
| { |
| "epoch": 1.142991236801125, |
| "grad_norm": 0.00017729878891259432, |
| "learning_rate": 1.166694472159594e-05, |
| "loss": 0.0, |
| "step": 716000 |
| }, |
| { |
| "epoch": 1.1437894150391148, |
| "grad_norm": 0.00020900214440189302, |
| "learning_rate": 1.1648603541838759e-05, |
| "loss": 0.0, |
| "step": 716500 |
| }, |
| { |
| "epoch": 1.1445875932771044, |
| "grad_norm": 0.00021771031606476754, |
| "learning_rate": 1.1630267634534078e-05, |
| "loss": 0.0, |
| "step": 717000 |
| }, |
| { |
| "epoch": 1.145385771515094, |
| "grad_norm": 0.000251882360316813, |
| "learning_rate": 1.161193702852814e-05, |
| "loss": 0.0, |
| "step": 717500 |
| }, |
| { |
| "epoch": 1.1461839497530835, |
| "grad_norm": 0.00020855554612353444, |
| "learning_rate": 1.1593611752658857e-05, |
| "loss": 0.0, |
| "step": 718000 |
| }, |
| { |
| "epoch": 1.1469821279910732, |
| "grad_norm": 0.00013053267321083695, |
| "learning_rate": 1.1575291835755743e-05, |
| "loss": 0.0, |
| "step": 718500 |
| }, |
| { |
| "epoch": 1.1477803062290628, |
| "grad_norm": 0.00018806445586960763, |
| "learning_rate": 1.155697730663989e-05, |
| "loss": 0.0, |
| "step": 719000 |
| }, |
| { |
| "epoch": 1.1485784844670524, |
| "grad_norm": 0.00016325576871167868, |
| "learning_rate": 1.1538668194123901e-05, |
| "loss": 0.0, |
| "step": 719500 |
| }, |
| { |
| "epoch": 1.1493766627050421, |
| "grad_norm": 0.00024883818696253, |
| "learning_rate": 1.152036452701188e-05, |
| "loss": 0.0, |
| "step": 720000 |
| }, |
| { |
| "epoch": 1.1501748409430317, |
| "grad_norm": 0.0001439152838429436, |
| "learning_rate": 1.1502066334099339e-05, |
| "loss": 0.0, |
| "step": 720500 |
| }, |
| { |
| "epoch": 1.1509730191810212, |
| "grad_norm": 0.010649287141859531, |
| "learning_rate": 1.1483773644173191e-05, |
| "loss": 0.0, |
| "step": 721000 |
| }, |
| { |
| "epoch": 1.1517711974190108, |
| "grad_norm": 0.0003836087416857481, |
| "learning_rate": 1.1465486486011695e-05, |
| "loss": 0.0, |
| "step": 721500 |
| }, |
| { |
| "epoch": 1.1525693756570004, |
| "grad_norm": 0.00016571102605666965, |
| "learning_rate": 1.1447204888384396e-05, |
| "loss": 0.0, |
| "step": 722000 |
| }, |
| { |
| "epoch": 1.1533675538949901, |
| "grad_norm": 0.003242659382522106, |
| "learning_rate": 1.1428928880052106e-05, |
| "loss": 0.0, |
| "step": 722500 |
| }, |
| { |
| "epoch": 1.1541657321329797, |
| "grad_norm": 0.0001464882370783016, |
| "learning_rate": 1.141065848976683e-05, |
| "loss": 0.0, |
| "step": 723000 |
| }, |
| { |
| "epoch": 1.1549639103709692, |
| "grad_norm": 0.0001437840110156685, |
| "learning_rate": 1.1392393746271738e-05, |
| "loss": 0.0, |
| "step": 723500 |
| }, |
| { |
| "epoch": 1.155762088608959, |
| "grad_norm": 9.78041862254031e-05, |
| "learning_rate": 1.1374134678301124e-05, |
| "loss": 0.0, |
| "step": 724000 |
| }, |
| { |
| "epoch": 1.1565602668469486, |
| "grad_norm": 0.01296873390674591, |
| "learning_rate": 1.1355881314580341e-05, |
| "loss": 0.0, |
| "step": 724500 |
| }, |
| { |
| "epoch": 1.1573584450849381, |
| "grad_norm": 0.000577951839659363, |
| "learning_rate": 1.1337633683825783e-05, |
| "loss": 0.0, |
| "step": 725000 |
| }, |
| { |
| "epoch": 1.1581566233229277, |
| "grad_norm": 0.0002495780645404011, |
| "learning_rate": 1.1319391814744808e-05, |
| "loss": 0.0, |
| "step": 725500 |
| }, |
| { |
| "epoch": 1.1589548015609175, |
| "grad_norm": 0.00020137692627031356, |
| "learning_rate": 1.1301155736035725e-05, |
| "loss": 0.0, |
| "step": 726000 |
| }, |
| { |
| "epoch": 1.159752979798907, |
| "grad_norm": 0.0002284547663293779, |
| "learning_rate": 1.1282925476387725e-05, |
| "loss": 0.0, |
| "step": 726500 |
| }, |
| { |
| "epoch": 1.1605511580368966, |
| "grad_norm": 0.0001997579965973273, |
| "learning_rate": 1.1264701064480842e-05, |
| "loss": 0.0, |
| "step": 727000 |
| }, |
| { |
| "epoch": 1.1613493362748861, |
| "grad_norm": 0.00020096104708500206, |
| "learning_rate": 1.1246482528985918e-05, |
| "loss": 0.0, |
| "step": 727500 |
| }, |
| { |
| "epoch": 1.162147514512876, |
| "grad_norm": 0.0002553035446908325, |
| "learning_rate": 1.1228269898564545e-05, |
| "loss": 0.0, |
| "step": 728000 |
| }, |
| { |
| "epoch": 1.1629456927508655, |
| "grad_norm": 0.00024139387824106961, |
| "learning_rate": 1.121006320186903e-05, |
| "loss": 0.0, |
| "step": 728500 |
| }, |
| { |
| "epoch": 1.163743870988855, |
| "grad_norm": 0.0002338308549951762, |
| "learning_rate": 1.1191862467542337e-05, |
| "loss": 0.0, |
| "step": 729000 |
| }, |
| { |
| "epoch": 1.1645420492268446, |
| "grad_norm": 0.00048753820010460913, |
| "learning_rate": 1.117366772421806e-05, |
| "loss": 0.0, |
| "step": 729500 |
| }, |
| { |
| "epoch": 1.1653402274648343, |
| "grad_norm": 0.00029974625795148313, |
| "learning_rate": 1.1155479000520359e-05, |
| "loss": 0.0, |
| "step": 730000 |
| }, |
| { |
| "epoch": 1.166138405702824, |
| "grad_norm": 0.0002745148085523397, |
| "learning_rate": 1.1137296325063923e-05, |
| "loss": 0.0, |
| "step": 730500 |
| }, |
| { |
| "epoch": 1.1669365839408135, |
| "grad_norm": 0.0003195735043846071, |
| "learning_rate": 1.1119119726453938e-05, |
| "loss": 0.0, |
| "step": 731000 |
| }, |
| { |
| "epoch": 1.167734762178803, |
| "grad_norm": 0.0003031744563486427, |
| "learning_rate": 1.1100949233286018e-05, |
| "loss": 0.0, |
| "step": 731500 |
| }, |
| { |
| "epoch": 1.1685329404167928, |
| "grad_norm": 0.0005830961745232344, |
| "learning_rate": 1.1082784874146175e-05, |
| "loss": 0.0, |
| "step": 732000 |
| }, |
| { |
| "epoch": 1.1693311186547823, |
| "grad_norm": 0.0003262453246861696, |
| "learning_rate": 1.1064626677610778e-05, |
| "loss": 0.0, |
| "step": 732500 |
| }, |
| { |
| "epoch": 1.170129296892772, |
| "grad_norm": 0.00045430276077240705, |
| "learning_rate": 1.1046474672246483e-05, |
| "loss": 0.0, |
| "step": 733000 |
| }, |
| { |
| "epoch": 1.1709274751307617, |
| "grad_norm": 0.0004028049297630787, |
| "learning_rate": 1.1028328886610229e-05, |
| "loss": 0.0, |
| "step": 733500 |
| }, |
| { |
| "epoch": 1.1717256533687512, |
| "grad_norm": 0.00026213665842078626, |
| "learning_rate": 1.101018934924915e-05, |
| "loss": 0.0, |
| "step": 734000 |
| }, |
| { |
| "epoch": 1.1725238316067408, |
| "grad_norm": 0.00020008819410577416, |
| "learning_rate": 1.099205608870057e-05, |
| "loss": 0.0, |
| "step": 734500 |
| }, |
| { |
| "epoch": 1.1733220098447303, |
| "grad_norm": 0.0004212880157865584, |
| "learning_rate": 1.0973929133491912e-05, |
| "loss": 0.0, |
| "step": 735000 |
| }, |
| { |
| "epoch": 1.17412018808272, |
| "grad_norm": 0.00021266612748149782, |
| "learning_rate": 1.0955808512140709e-05, |
| "loss": 0.0, |
| "step": 735500 |
| }, |
| { |
| "epoch": 1.1749183663207097, |
| "grad_norm": 0.00032644724706187844, |
| "learning_rate": 1.093769425315451e-05, |
| "loss": 0.0, |
| "step": 736000 |
| }, |
| { |
| "epoch": 1.1757165445586992, |
| "grad_norm": 0.09483543783426285, |
| "learning_rate": 1.0919586385030849e-05, |
| "loss": 0.0, |
| "step": 736500 |
| }, |
| { |
| "epoch": 1.1765147227966888, |
| "grad_norm": 0.00034892070107162, |
| "learning_rate": 1.0901484936257235e-05, |
| "loss": 0.0, |
| "step": 737000 |
| }, |
| { |
| "epoch": 1.1773129010346786, |
| "grad_norm": 0.00021350174210965633, |
| "learning_rate": 1.0883389935311041e-05, |
| "loss": 0.0, |
| "step": 737500 |
| }, |
| { |
| "epoch": 1.1781110792726681, |
| "grad_norm": 0.00020187548943795264, |
| "learning_rate": 1.086530141065953e-05, |
| "loss": 0.0, |
| "step": 738000 |
| }, |
| { |
| "epoch": 1.1789092575106577, |
| "grad_norm": 0.0004036907048430294, |
| "learning_rate": 1.0847219390759752e-05, |
| "loss": 0.0, |
| "step": 738500 |
| }, |
| { |
| "epoch": 1.1797074357486472, |
| "grad_norm": 0.0001676503597991541, |
| "learning_rate": 1.082914390405854e-05, |
| "loss": 0.0, |
| "step": 739000 |
| }, |
| { |
| "epoch": 1.1805056139866368, |
| "grad_norm": 0.0003050376835744828, |
| "learning_rate": 1.0811074978992437e-05, |
| "loss": 0.0, |
| "step": 739500 |
| }, |
| { |
| "epoch": 1.1813037922246266, |
| "grad_norm": 0.0001976622297661379, |
| "learning_rate": 1.0793012643987662e-05, |
| "loss": 0.0, |
| "step": 740000 |
| }, |
| { |
| "epoch": 1.1821019704626161, |
| "grad_norm": 0.00024282137746922672, |
| "learning_rate": 1.0774956927460085e-05, |
| "loss": 0.0, |
| "step": 740500 |
| }, |
| { |
| "epoch": 1.1829001487006057, |
| "grad_norm": 0.00012517427967395633, |
| "learning_rate": 1.0756907857815136e-05, |
| "loss": 0.0, |
| "step": 741000 |
| }, |
| { |
| "epoch": 1.1836983269385954, |
| "grad_norm": 0.00019520529895089567, |
| "learning_rate": 1.0738865463447822e-05, |
| "loss": 0.0, |
| "step": 741500 |
| }, |
| { |
| "epoch": 1.184496505176585, |
| "grad_norm": 0.0006624461966566741, |
| "learning_rate": 1.0720829772742615e-05, |
| "loss": 0.0, |
| "step": 742000 |
| }, |
| { |
| "epoch": 1.1852946834145746, |
| "grad_norm": 0.00021578549058176577, |
| "learning_rate": 1.070280081407345e-05, |
| "loss": 0.0, |
| "step": 742500 |
| }, |
| { |
| "epoch": 1.1860928616525641, |
| "grad_norm": 2.1622965335845947, |
| "learning_rate": 1.0684778615803701e-05, |
| "loss": 0.0, |
| "step": 743000 |
| }, |
| { |
| "epoch": 1.186891039890554, |
| "grad_norm": 0.0002855357888620347, |
| "learning_rate": 1.0666763206286051e-05, |
| "loss": 0.0, |
| "step": 743500 |
| }, |
| { |
| "epoch": 1.1876892181285434, |
| "grad_norm": 0.00014283708878792822, |
| "learning_rate": 1.064875461386256e-05, |
| "loss": 0.0, |
| "step": 744000 |
| }, |
| { |
| "epoch": 1.188487396366533, |
| "grad_norm": 0.00024032694636844099, |
| "learning_rate": 1.0630752866864518e-05, |
| "loss": 0.0, |
| "step": 744500 |
| }, |
| { |
| "epoch": 1.1892855746045226, |
| "grad_norm": 0.00016632409824524075, |
| "learning_rate": 1.0612757993612478e-05, |
| "loss": 0.0, |
| "step": 745000 |
| }, |
| { |
| "epoch": 1.1900837528425123, |
| "grad_norm": 0.00023169444466475397, |
| "learning_rate": 1.059477002241616e-05, |
| "loss": 0.0, |
| "step": 745500 |
| }, |
| { |
| "epoch": 1.1908819310805019, |
| "grad_norm": 0.0002732094144448638, |
| "learning_rate": 1.0576788981574428e-05, |
| "loss": 0.0, |
| "step": 746000 |
| }, |
| { |
| "epoch": 1.1916801093184914, |
| "grad_norm": 0.0003594690060708672, |
| "learning_rate": 1.055881489937525e-05, |
| "loss": 0.0, |
| "step": 746500 |
| }, |
| { |
| "epoch": 1.192478287556481, |
| "grad_norm": 0.01778334006667137, |
| "learning_rate": 1.0540847804095639e-05, |
| "loss": 0.0, |
| "step": 747000 |
| }, |
| { |
| "epoch": 1.1932764657944708, |
| "grad_norm": 0.00024451143690384924, |
| "learning_rate": 1.0522887724001632e-05, |
| "loss": 0.0, |
| "step": 747500 |
| }, |
| { |
| "epoch": 1.1940746440324603, |
| "grad_norm": 0.0002816928317770362, |
| "learning_rate": 1.0504934687348198e-05, |
| "loss": 0.0, |
| "step": 748000 |
| }, |
| { |
| "epoch": 1.1948728222704499, |
| "grad_norm": 0.00029887750861234963, |
| "learning_rate": 1.048698872237927e-05, |
| "loss": 0.0, |
| "step": 748500 |
| }, |
| { |
| "epoch": 1.1956710005084394, |
| "grad_norm": 0.000365947576938197, |
| "learning_rate": 1.0469049857327611e-05, |
| "loss": 0.0, |
| "step": 749000 |
| }, |
| { |
| "epoch": 1.1964691787464292, |
| "grad_norm": 0.00027725560357794166, |
| "learning_rate": 1.0451118120414837e-05, |
| "loss": 0.0, |
| "step": 749500 |
| }, |
| { |
| "epoch": 1.1972673569844188, |
| "grad_norm": 0.00019809386867564172, |
| "learning_rate": 1.0433193539851356e-05, |
| "loss": 0.0, |
| "step": 750000 |
| }, |
| { |
| "epoch": 1.1972673569844188, |
| "eval_loss": 1.1492022167658433e-05, |
| "eval_runtime": 22209.8831, |
| "eval_samples_per_second": 100.284, |
| "eval_steps_per_second": 3.134, |
| "step": 750000 |
| }, |
| { |
| "epoch": 1.1980655352224083, |
| "grad_norm": 0.24246802926063538, |
| "learning_rate": 1.0415276143836297e-05, |
| "loss": 0.0, |
| "step": 750500 |
| }, |
| { |
| "epoch": 1.198863713460398, |
| "grad_norm": 0.00016463996144011617, |
| "learning_rate": 1.0397365960557508e-05, |
| "loss": 0.0, |
| "step": 751000 |
| }, |
| { |
| "epoch": 1.1996618916983877, |
| "grad_norm": 0.00020477671932894737, |
| "learning_rate": 1.0379463018191474e-05, |
| "loss": 0.0, |
| "step": 751500 |
| }, |
| { |
| "epoch": 1.2004600699363772, |
| "grad_norm": 0.0001754205150064081, |
| "learning_rate": 1.0361567344903292e-05, |
| "loss": 0.0, |
| "step": 752000 |
| }, |
| { |
| "epoch": 1.2012582481743668, |
| "grad_norm": 76.75057220458984, |
| "learning_rate": 1.0343678968846633e-05, |
| "loss": 0.0, |
| "step": 752500 |
| }, |
| { |
| "epoch": 1.2020564264123563, |
| "grad_norm": 0.00011788753909058869, |
| "learning_rate": 1.0325797918163671e-05, |
| "loss": 0.0, |
| "step": 753000 |
| }, |
| { |
| "epoch": 1.202854604650346, |
| "grad_norm": 0.00018122825713362545, |
| "learning_rate": 1.030792422098507e-05, |
| "loss": 0.0, |
| "step": 753500 |
| }, |
| { |
| "epoch": 1.2036527828883357, |
| "grad_norm": 0.00012735063501168042, |
| "learning_rate": 1.029005790542992e-05, |
| "loss": 0.0, |
| "step": 754000 |
| }, |
| { |
| "epoch": 1.2044509611263252, |
| "grad_norm": 0.0020100900437682867, |
| "learning_rate": 1.0272198999605701e-05, |
| "loss": 0.0, |
| "step": 754500 |
| }, |
| { |
| "epoch": 1.205249139364315, |
| "grad_norm": 0.004669174086302519, |
| "learning_rate": 1.025434753160823e-05, |
| "loss": 0.0, |
| "step": 755000 |
| }, |
| { |
| "epoch": 1.2060473176023045, |
| "grad_norm": 0.00020723527995869517, |
| "learning_rate": 1.0236503529521623e-05, |
| "loss": 0.0, |
| "step": 755500 |
| }, |
| { |
| "epoch": 1.206845495840294, |
| "grad_norm": 0.00024396587105002254, |
| "learning_rate": 1.021866702141826e-05, |
| "loss": 0.0, |
| "step": 756000 |
| }, |
| { |
| "epoch": 1.2076436740782837, |
| "grad_norm": 0.0006310238968580961, |
| "learning_rate": 1.0200838035358719e-05, |
| "loss": 0.0, |
| "step": 756500 |
| }, |
| { |
| "epoch": 1.2084418523162732, |
| "grad_norm": 0.00026260962476953864, |
| "learning_rate": 1.0183016599391756e-05, |
| "loss": 0.0, |
| "step": 757000 |
| }, |
| { |
| "epoch": 1.209240030554263, |
| "grad_norm": 0.0001703925954643637, |
| "learning_rate": 1.0165202741554238e-05, |
| "loss": 0.0, |
| "step": 757500 |
| }, |
| { |
| "epoch": 1.2100382087922525, |
| "grad_norm": 0.0003848731575999409, |
| "learning_rate": 1.014739648987112e-05, |
| "loss": 0.0, |
| "step": 758000 |
| }, |
| { |
| "epoch": 1.210836387030242, |
| "grad_norm": 0.00011386480764485896, |
| "learning_rate": 1.0129597872355384e-05, |
| "loss": 0.0, |
| "step": 758500 |
| }, |
| { |
| "epoch": 1.2116345652682319, |
| "grad_norm": 0.0001352078834315762, |
| "learning_rate": 1.0111806917008004e-05, |
| "loss": 0.0, |
| "step": 759000 |
| }, |
| { |
| "epoch": 1.2124327435062214, |
| "grad_norm": 7.882779755163938e-05, |
| "learning_rate": 1.00940236518179e-05, |
| "loss": 0.0, |
| "step": 759500 |
| }, |
| { |
| "epoch": 1.213230921744211, |
| "grad_norm": 0.0001364546042168513, |
| "learning_rate": 1.0076248104761892e-05, |
| "loss": 0.0, |
| "step": 760000 |
| }, |
| { |
| "epoch": 1.2140290999822005, |
| "grad_norm": 0.00026103860000148416, |
| "learning_rate": 1.0058480303804666e-05, |
| "loss": 0.0, |
| "step": 760500 |
| }, |
| { |
| "epoch": 1.2148272782201903, |
| "grad_norm": 0.00013641221448779106, |
| "learning_rate": 1.0040720276898708e-05, |
| "loss": 0.0, |
| "step": 761000 |
| }, |
| { |
| "epoch": 1.2156254564581799, |
| "grad_norm": 0.0008844132535159588, |
| "learning_rate": 1.0022968051984282e-05, |
| "loss": 0.0, |
| "step": 761500 |
| }, |
| { |
| "epoch": 1.2164236346961694, |
| "grad_norm": 0.00033063263981603086, |
| "learning_rate": 1.0005223656989379e-05, |
| "loss": 0.0, |
| "step": 762000 |
| }, |
| { |
| "epoch": 1.217221812934159, |
| "grad_norm": 0.00022558389173354954, |
| "learning_rate": 9.98748711982967e-06, |
| "loss": 0.0, |
| "step": 762500 |
| }, |
| { |
| "epoch": 1.2180199911721488, |
| "grad_norm": 0.0001900517090689391, |
| "learning_rate": 9.969758468408462e-06, |
| "loss": 0.0, |
| "step": 763000 |
| }, |
| { |
| "epoch": 1.2188181694101383, |
| "grad_norm": 0.00016768294153735042, |
| "learning_rate": 9.952037730616658e-06, |
| "loss": 0.0, |
| "step": 763500 |
| }, |
| { |
| "epoch": 1.2196163476481279, |
| "grad_norm": 0.0002104245504597202, |
| "learning_rate": 9.934324934332713e-06, |
| "loss": 0.0, |
| "step": 764000 |
| }, |
| { |
| "epoch": 1.2204145258861174, |
| "grad_norm": 0.00023655268887523562, |
| "learning_rate": 9.916620107422582e-06, |
| "loss": 0.0, |
| "step": 764500 |
| }, |
| { |
| "epoch": 1.2212127041241072, |
| "grad_norm": 0.0002007113944273442, |
| "learning_rate": 9.89892327773969e-06, |
| "loss": 0.0, |
| "step": 765000 |
| }, |
| { |
| "epoch": 1.2220108823620968, |
| "grad_norm": 9.15752124786377, |
| "learning_rate": 9.881234473124877e-06, |
| "loss": 0.0, |
| "step": 765500 |
| }, |
| { |
| "epoch": 1.2228090606000863, |
| "grad_norm": 0.0015039691934362054, |
| "learning_rate": 9.863553721406356e-06, |
| "loss": 0.0, |
| "step": 766000 |
| }, |
| { |
| "epoch": 1.2236072388380759, |
| "grad_norm": 0.00017231931269634515, |
| "learning_rate": 9.845881050399678e-06, |
| "loss": 0.0, |
| "step": 766500 |
| }, |
| { |
| "epoch": 1.2244054170760656, |
| "grad_norm": 0.00019999749201815575, |
| "learning_rate": 9.828216487907672e-06, |
| "loss": 0.0, |
| "step": 767000 |
| }, |
| { |
| "epoch": 1.2252035953140552, |
| "grad_norm": 0.0002266662777401507, |
| "learning_rate": 9.810560061720419e-06, |
| "loss": 0.0, |
| "step": 767500 |
| }, |
| { |
| "epoch": 1.2260017735520448, |
| "grad_norm": 0.08027360588312149, |
| "learning_rate": 9.792911799615198e-06, |
| "loss": 0.0, |
| "step": 768000 |
| }, |
| { |
| "epoch": 1.2267999517900345, |
| "grad_norm": 0.00035596557427197695, |
| "learning_rate": 9.775271729356429e-06, |
| "loss": 0.0, |
| "step": 768500 |
| }, |
| { |
| "epoch": 1.227598130028024, |
| "grad_norm": 0.0004455151502043009, |
| "learning_rate": 9.757639878695674e-06, |
| "loss": 0.0, |
| "step": 769000 |
| }, |
| { |
| "epoch": 1.2283963082660136, |
| "grad_norm": 0.00016969860007520765, |
| "learning_rate": 9.74001627537154e-06, |
| "loss": 0.0, |
| "step": 769500 |
| }, |
| { |
| "epoch": 1.2291944865040032, |
| "grad_norm": 0.0002785904798656702, |
| "learning_rate": 9.72240094710967e-06, |
| "loss": 0.0, |
| "step": 770000 |
| }, |
| { |
| "epoch": 1.2299926647419928, |
| "grad_norm": 0.00011213342804694548, |
| "learning_rate": 9.704793921622687e-06, |
| "loss": 0.0, |
| "step": 770500 |
| }, |
| { |
| "epoch": 1.2307908429799825, |
| "grad_norm": 0.00030834253993816674, |
| "learning_rate": 9.68719522661014e-06, |
| "loss": 0.0, |
| "step": 771000 |
| }, |
| { |
| "epoch": 1.231589021217972, |
| "grad_norm": 0.0002819538349285722, |
| "learning_rate": 9.6696048897585e-06, |
| "loss": 0.0, |
| "step": 771500 |
| }, |
| { |
| "epoch": 1.2323871994559616, |
| "grad_norm": 0.00018688519776333123, |
| "learning_rate": 9.652022938741049e-06, |
| "loss": 0.0, |
| "step": 772000 |
| }, |
| { |
| "epoch": 1.2331853776939514, |
| "grad_norm": 0.00018685254326555878, |
| "learning_rate": 9.634449401217926e-06, |
| "loss": 0.0, |
| "step": 772500 |
| }, |
| { |
| "epoch": 1.233983555931941, |
| "grad_norm": 0.00023071758914738894, |
| "learning_rate": 9.616884304835981e-06, |
| "loss": 0.0, |
| "step": 773000 |
| }, |
| { |
| "epoch": 1.2347817341699305, |
| "grad_norm": 0.0003963226336054504, |
| "learning_rate": 9.59932767722883e-06, |
| "loss": 0.0, |
| "step": 773500 |
| }, |
| { |
| "epoch": 1.23557991240792, |
| "grad_norm": 0.0005826257402077317, |
| "learning_rate": 9.581779546016741e-06, |
| "loss": 0.0, |
| "step": 774000 |
| }, |
| { |
| "epoch": 1.2363780906459099, |
| "grad_norm": 0.0002092513459501788, |
| "learning_rate": 9.56423993880661e-06, |
| "loss": 0.0, |
| "step": 774500 |
| }, |
| { |
| "epoch": 1.2371762688838994, |
| "grad_norm": 0.00026280272868461907, |
| "learning_rate": 9.546708883191948e-06, |
| "loss": 0.0, |
| "step": 775000 |
| }, |
| { |
| "epoch": 1.237974447121889, |
| "grad_norm": 0.0003001219010911882, |
| "learning_rate": 9.529186406752782e-06, |
| "loss": 0.0, |
| "step": 775500 |
| }, |
| { |
| "epoch": 1.2387726253598785, |
| "grad_norm": 0.002450470346957445, |
| "learning_rate": 9.511672537055676e-06, |
| "loss": 0.0, |
| "step": 776000 |
| }, |
| { |
| "epoch": 1.2395708035978683, |
| "grad_norm": 0.004587731324136257, |
| "learning_rate": 9.494167301653618e-06, |
| "loss": 0.0, |
| "step": 776500 |
| }, |
| { |
| "epoch": 1.2403689818358579, |
| "grad_norm": 0.00031009313534013927, |
| "learning_rate": 9.47667072808605e-06, |
| "loss": 0.0, |
| "step": 777000 |
| }, |
| { |
| "epoch": 1.2411671600738474, |
| "grad_norm": 0.5249637365341187, |
| "learning_rate": 9.459182843878752e-06, |
| "loss": 0.0, |
| "step": 777500 |
| }, |
| { |
| "epoch": 1.241965338311837, |
| "grad_norm": 0.00017867004498839378, |
| "learning_rate": 9.441703676543848e-06, |
| "loss": 0.0, |
| "step": 778000 |
| }, |
| { |
| "epoch": 1.2427635165498268, |
| "grad_norm": 0.0002787476987577975, |
| "learning_rate": 9.424233253579762e-06, |
| "loss": 0.0, |
| "step": 778500 |
| }, |
| { |
| "epoch": 1.2435616947878163, |
| "grad_norm": 0.00018374405044596642, |
| "learning_rate": 9.406771602471137e-06, |
| "loss": 0.0, |
| "step": 779000 |
| }, |
| { |
| "epoch": 1.2443598730258059, |
| "grad_norm": 0.0002696373558137566, |
| "learning_rate": 9.38931875068884e-06, |
| "loss": 0.0, |
| "step": 779500 |
| }, |
| { |
| "epoch": 1.2451580512637954, |
| "grad_norm": 0.00013673820649273694, |
| "learning_rate": 9.371874725689875e-06, |
| "loss": 0.0, |
| "step": 780000 |
| }, |
| { |
| "epoch": 1.2459562295017852, |
| "grad_norm": 0.0001669849589234218, |
| "learning_rate": 9.354439554917364e-06, |
| "loss": 0.0, |
| "step": 780500 |
| }, |
| { |
| "epoch": 1.2467544077397748, |
| "grad_norm": 0.00021636247402057052, |
| "learning_rate": 9.33701326580051e-06, |
| "loss": 0.0, |
| "step": 781000 |
| }, |
| { |
| "epoch": 1.2475525859777643, |
| "grad_norm": 0.00017172202933579683, |
| "learning_rate": 9.319595885754533e-06, |
| "loss": 0.0, |
| "step": 781500 |
| }, |
| { |
| "epoch": 1.248350764215754, |
| "grad_norm": 0.00012820841220673174, |
| "learning_rate": 9.302187442180641e-06, |
| "loss": 0.0, |
| "step": 782000 |
| }, |
| { |
| "epoch": 1.2491489424537436, |
| "grad_norm": 0.00019042339408770204, |
| "learning_rate": 9.28478796246598e-06, |
| "loss": 0.0, |
| "step": 782500 |
| }, |
| { |
| "epoch": 1.2499471206917332, |
| "grad_norm": 0.00022117479238659143, |
| "learning_rate": 9.267397473983602e-06, |
| "loss": 0.0, |
| "step": 783000 |
| }, |
| { |
| "epoch": 1.2507452989297227, |
| "grad_norm": 0.00012226690887473524, |
| "learning_rate": 9.250016004092404e-06, |
| "loss": 0.0, |
| "step": 783500 |
| }, |
| { |
| "epoch": 1.2515434771677123, |
| "grad_norm": 0.00023977088858373463, |
| "learning_rate": 9.232643580137095e-06, |
| "loss": 0.0, |
| "step": 784000 |
| }, |
| { |
| "epoch": 1.252341655405702, |
| "grad_norm": 0.00018954268307425082, |
| "learning_rate": 9.215280229448168e-06, |
| "loss": 0.0, |
| "step": 784500 |
| }, |
| { |
| "epoch": 1.2531398336436916, |
| "grad_norm": 0.0015390360495075583, |
| "learning_rate": 9.197925979341817e-06, |
| "loss": 0.0, |
| "step": 785000 |
| }, |
| { |
| "epoch": 1.2539380118816812, |
| "grad_norm": 0.0001842692872742191, |
| "learning_rate": 9.180580857119946e-06, |
| "loss": 0.0, |
| "step": 785500 |
| }, |
| { |
| "epoch": 1.254736190119671, |
| "grad_norm": 0.006291312165558338, |
| "learning_rate": 9.163244890070076e-06, |
| "loss": 0.0, |
| "step": 786000 |
| }, |
| { |
| "epoch": 1.2555343683576605, |
| "grad_norm": 0.00022410901146940887, |
| "learning_rate": 9.145918105465339e-06, |
| "loss": 0.0, |
| "step": 786500 |
| }, |
| { |
| "epoch": 1.25633254659565, |
| "grad_norm": 0.00015199794142972678, |
| "learning_rate": 9.128600530564417e-06, |
| "loss": 0.0, |
| "step": 787000 |
| }, |
| { |
| "epoch": 1.2571307248336396, |
| "grad_norm": 0.00016999320359900594, |
| "learning_rate": 9.1112921926115e-06, |
| "loss": 0.0, |
| "step": 787500 |
| }, |
| { |
| "epoch": 1.2579289030716292, |
| "grad_norm": 0.00017655811097938567, |
| "learning_rate": 9.09399311883625e-06, |
| "loss": 0.0, |
| "step": 788000 |
| }, |
| { |
| "epoch": 1.258727081309619, |
| "grad_norm": 0.00023002490343060344, |
| "learning_rate": 9.07670333645375e-06, |
| "loss": 0.0, |
| "step": 788500 |
| }, |
| { |
| "epoch": 1.2595252595476085, |
| "grad_norm": 0.00012944928312208503, |
| "learning_rate": 9.059422872664476e-06, |
| "loss": 0.0, |
| "step": 789000 |
| }, |
| { |
| "epoch": 1.260323437785598, |
| "grad_norm": 14.440518379211426, |
| "learning_rate": 9.042151754654239e-06, |
| "loss": 0.0, |
| "step": 789500 |
| }, |
| { |
| "epoch": 1.2611216160235879, |
| "grad_norm": 0.00011326325329719111, |
| "learning_rate": 9.024890009594134e-06, |
| "loss": 0.0, |
| "step": 790000 |
| }, |
| { |
| "epoch": 1.2619197942615774, |
| "grad_norm": 0.0001579568488523364, |
| "learning_rate": 9.00763766464053e-06, |
| "loss": 0.0, |
| "step": 790500 |
| }, |
| { |
| "epoch": 1.262717972499567, |
| "grad_norm": 5.766981601715088, |
| "learning_rate": 8.990394746935e-06, |
| "loss": 0.0, |
| "step": 791000 |
| }, |
| { |
| "epoch": 1.2635161507375565, |
| "grad_norm": 0.00011029910092474893, |
| "learning_rate": 8.97316128360428e-06, |
| "loss": 0.0, |
| "step": 791500 |
| }, |
| { |
| "epoch": 1.264314328975546, |
| "grad_norm": 0.00014391505101229995, |
| "learning_rate": 8.955937301760239e-06, |
| "loss": 0.0, |
| "step": 792000 |
| }, |
| { |
| "epoch": 1.2651125072135359, |
| "grad_norm": 0.0001435024314559996, |
| "learning_rate": 8.938722828499834e-06, |
| "loss": 0.0, |
| "step": 792500 |
| }, |
| { |
| "epoch": 1.2659106854515254, |
| "grad_norm": 0.00015525566413998604, |
| "learning_rate": 8.921517890905052e-06, |
| "loss": 0.0, |
| "step": 793000 |
| }, |
| { |
| "epoch": 1.266708863689515, |
| "grad_norm": 0.00010429436224512756, |
| "learning_rate": 8.90432251604288e-06, |
| "loss": 0.0, |
| "step": 793500 |
| }, |
| { |
| "epoch": 1.2675070419275047, |
| "grad_norm": 0.00012090901145711541, |
| "learning_rate": 8.887136730965275e-06, |
| "loss": 0.0, |
| "step": 794000 |
| }, |
| { |
| "epoch": 1.2683052201654943, |
| "grad_norm": 0.00014295458095148206, |
| "learning_rate": 8.869960562709083e-06, |
| "loss": 0.0, |
| "step": 794500 |
| }, |
| { |
| "epoch": 1.2691033984034839, |
| "grad_norm": 0.00015002151485532522, |
| "learning_rate": 8.852794038296048e-06, |
| "loss": 0.0, |
| "step": 795000 |
| }, |
| { |
| "epoch": 1.2699015766414736, |
| "grad_norm": 0.00012087346840417013, |
| "learning_rate": 8.835637184732717e-06, |
| "loss": 0.0, |
| "step": 795500 |
| }, |
| { |
| "epoch": 1.2706997548794632, |
| "grad_norm": 0.00017985192243941128, |
| "learning_rate": 8.818490029010444e-06, |
| "loss": 0.0, |
| "step": 796000 |
| }, |
| { |
| "epoch": 1.2714979331174527, |
| "grad_norm": 8.896778308553621e-05, |
| "learning_rate": 8.80135259810531e-06, |
| "loss": 0.0, |
| "step": 796500 |
| }, |
| { |
| "epoch": 1.2722961113554423, |
| "grad_norm": 0.00011164528405060992, |
| "learning_rate": 8.784224918978105e-06, |
| "loss": 0.0, |
| "step": 797000 |
| }, |
| { |
| "epoch": 1.2730942895934318, |
| "grad_norm": 533.9027709960938, |
| "learning_rate": 8.767107018574276e-06, |
| "loss": 0.0, |
| "step": 797500 |
| }, |
| { |
| "epoch": 1.2738924678314216, |
| "grad_norm": 0.00014446699060499668, |
| "learning_rate": 8.749998923823887e-06, |
| "loss": 0.0, |
| "step": 798000 |
| }, |
| { |
| "epoch": 1.2746906460694112, |
| "grad_norm": 0.04612082615494728, |
| "learning_rate": 8.732900661641568e-06, |
| "loss": 0.0, |
| "step": 798500 |
| }, |
| { |
| "epoch": 1.2754888243074007, |
| "grad_norm": 0.00155142811127007, |
| "learning_rate": 8.715812258926501e-06, |
| "loss": 0.0, |
| "step": 799000 |
| }, |
| { |
| "epoch": 1.2762870025453905, |
| "grad_norm": 0.0004437122552189976, |
| "learning_rate": 8.698733742562327e-06, |
| "loss": 0.0, |
| "step": 799500 |
| }, |
| { |
| "epoch": 1.27708518078338, |
| "grad_norm": 0.00015456078108400106, |
| "learning_rate": 8.681665139417154e-06, |
| "loss": 0.0, |
| "step": 800000 |
| }, |
| { |
| "epoch": 1.2778833590213696, |
| "grad_norm": 0.00022585850092582405, |
| "learning_rate": 8.66460647634349e-06, |
| "loss": 0.0, |
| "step": 800500 |
| }, |
| { |
| "epoch": 1.2786815372593592, |
| "grad_norm": 0.0011763167567551136, |
| "learning_rate": 8.647557780178216e-06, |
| "loss": 0.0, |
| "step": 801000 |
| }, |
| { |
| "epoch": 1.2794797154973487, |
| "grad_norm": 0.00016555427282582968, |
| "learning_rate": 8.630519077742505e-06, |
| "loss": 0.0, |
| "step": 801500 |
| }, |
| { |
| "epoch": 1.2802778937353385, |
| "grad_norm": 0.11920250207185745, |
| "learning_rate": 8.613490395841833e-06, |
| "loss": 0.0, |
| "step": 802000 |
| }, |
| { |
| "epoch": 1.281076071973328, |
| "grad_norm": 0.00025985552929341793, |
| "learning_rate": 8.596471761265905e-06, |
| "loss": 0.0, |
| "step": 802500 |
| }, |
| { |
| "epoch": 1.2818742502113176, |
| "grad_norm": 0.00033153867116197944, |
| "learning_rate": 8.57946320078861e-06, |
| "loss": 0.0, |
| "step": 803000 |
| }, |
| { |
| "epoch": 1.2826724284493074, |
| "grad_norm": 0.0003908054204657674, |
| "learning_rate": 8.562464741168003e-06, |
| "loss": 0.0, |
| "step": 803500 |
| }, |
| { |
| "epoch": 1.283470606687297, |
| "grad_norm": 0.00017760110495146364, |
| "learning_rate": 8.545476409146235e-06, |
| "loss": 0.0, |
| "step": 804000 |
| }, |
| { |
| "epoch": 1.2842687849252865, |
| "grad_norm": 0.00015641027130186558, |
| "learning_rate": 8.528498231449543e-06, |
| "loss": 0.0, |
| "step": 804500 |
| }, |
| { |
| "epoch": 1.285066963163276, |
| "grad_norm": 0.00013975071487948298, |
| "learning_rate": 8.511530234788162e-06, |
| "loss": 0.0, |
| "step": 805000 |
| }, |
| { |
| "epoch": 1.2858651414012656, |
| "grad_norm": 0.0007206627633422613, |
| "learning_rate": 8.494572445856327e-06, |
| "loss": 0.0, |
| "step": 805500 |
| }, |
| { |
| "epoch": 1.2866633196392554, |
| "grad_norm": 0.000786223856266588, |
| "learning_rate": 8.477624891332226e-06, |
| "loss": 0.0, |
| "step": 806000 |
| }, |
| { |
| "epoch": 1.287461497877245, |
| "grad_norm": 0.00019913198775611818, |
| "learning_rate": 8.460687597877912e-06, |
| "loss": 0.0, |
| "step": 806500 |
| }, |
| { |
| "epoch": 1.2882596761152345, |
| "grad_norm": 0.00020240710000507534, |
| "learning_rate": 8.443760592139325e-06, |
| "loss": 0.0, |
| "step": 807000 |
| }, |
| { |
| "epoch": 1.2890578543532243, |
| "grad_norm": 0.0007126539712771773, |
| "learning_rate": 8.426843900746208e-06, |
| "loss": 0.0, |
| "step": 807500 |
| }, |
| { |
| "epoch": 1.2898560325912138, |
| "grad_norm": 0.00020748093083966523, |
| "learning_rate": 8.409937550312087e-06, |
| "loss": 0.0, |
| "step": 808000 |
| }, |
| { |
| "epoch": 1.2906542108292034, |
| "grad_norm": 0.00014328854740597308, |
| "learning_rate": 8.3930415674342e-06, |
| "loss": 0.0, |
| "step": 808500 |
| }, |
| { |
| "epoch": 1.2914523890671932, |
| "grad_norm": 0.0001533345493953675, |
| "learning_rate": 8.376155978693492e-06, |
| "loss": 0.0, |
| "step": 809000 |
| }, |
| { |
| "epoch": 1.2922505673051827, |
| "grad_norm": 0.00023107643937692046, |
| "learning_rate": 8.359280810654558e-06, |
| "loss": 0.0, |
| "step": 809500 |
| }, |
| { |
| "epoch": 1.2930487455431723, |
| "grad_norm": 0.00015860753774177283, |
| "learning_rate": 8.342416089865576e-06, |
| "loss": 0.0, |
| "step": 810000 |
| }, |
| { |
| "epoch": 1.2938469237811618, |
| "grad_norm": 0.00012862969015259296, |
| "learning_rate": 8.325561842858315e-06, |
| "loss": 0.0, |
| "step": 810500 |
| }, |
| { |
| "epoch": 1.2946451020191514, |
| "grad_norm": 0.0001139972810051404, |
| "learning_rate": 8.308718096148053e-06, |
| "loss": 0.0, |
| "step": 811000 |
| }, |
| { |
| "epoch": 1.2954432802571412, |
| "grad_norm": 0.00019040738698095083, |
| "learning_rate": 8.291884876233555e-06, |
| "loss": 0.0, |
| "step": 811500 |
| }, |
| { |
| "epoch": 1.2962414584951307, |
| "grad_norm": 0.0001693676895229146, |
| "learning_rate": 8.275062209597011e-06, |
| "loss": 0.0, |
| "step": 812000 |
| }, |
| { |
| "epoch": 1.2970396367331203, |
| "grad_norm": 0.0026823594234883785, |
| "learning_rate": 8.258250122704027e-06, |
| "loss": 0.0, |
| "step": 812500 |
| }, |
| { |
| "epoch": 1.29783781497111, |
| "grad_norm": 0.0002031605108641088, |
| "learning_rate": 8.241448642003559e-06, |
| "loss": 0.0, |
| "step": 813000 |
| }, |
| { |
| "epoch": 1.2986359932090996, |
| "grad_norm": 0.0022485863883048296, |
| "learning_rate": 8.224657793927868e-06, |
| "loss": 0.0, |
| "step": 813500 |
| }, |
| { |
| "epoch": 1.2994341714470892, |
| "grad_norm": 0.00010316159023204818, |
| "learning_rate": 8.207877604892493e-06, |
| "loss": 0.0, |
| "step": 814000 |
| }, |
| { |
| "epoch": 1.3002323496850787, |
| "grad_norm": 0.00014087182353250682, |
| "learning_rate": 8.191108101296213e-06, |
| "loss": 0.0, |
| "step": 814500 |
| }, |
| { |
| "epoch": 1.3010305279230683, |
| "grad_norm": 0.00015167437959462404, |
| "learning_rate": 8.17434930952099e-06, |
| "loss": 0.0, |
| "step": 815000 |
| }, |
| { |
| "epoch": 1.301828706161058, |
| "grad_norm": 0.00015953517868183553, |
| "learning_rate": 8.157601255931927e-06, |
| "loss": 0.0, |
| "step": 815500 |
| }, |
| { |
| "epoch": 1.3026268843990476, |
| "grad_norm": 0.00014891130558680743, |
| "learning_rate": 8.140863966877238e-06, |
| "loss": 0.0, |
| "step": 816000 |
| }, |
| { |
| "epoch": 1.3034250626370372, |
| "grad_norm": 0.00016368075739592314, |
| "learning_rate": 8.124137468688216e-06, |
| "loss": 0.0, |
| "step": 816500 |
| }, |
| { |
| "epoch": 1.304223240875027, |
| "grad_norm": 0.00012912409147247672, |
| "learning_rate": 8.10742178767915e-06, |
| "loss": 0.0, |
| "step": 817000 |
| }, |
| { |
| "epoch": 1.3050214191130165, |
| "grad_norm": 0.04891032353043556, |
| "learning_rate": 8.090716950147336e-06, |
| "loss": 0.0, |
| "step": 817500 |
| }, |
| { |
| "epoch": 1.305819597351006, |
| "grad_norm": 0.000139459443744272, |
| "learning_rate": 8.074022982373006e-06, |
| "loss": 0.0, |
| "step": 818000 |
| }, |
| { |
| "epoch": 1.3066177755889956, |
| "grad_norm": 0.00022227551380638033, |
| "learning_rate": 8.057339910619277e-06, |
| "loss": 0.0, |
| "step": 818500 |
| }, |
| { |
| "epoch": 1.3074159538269852, |
| "grad_norm": 0.0002710748231038451, |
| "learning_rate": 8.040667761132143e-06, |
| "loss": 0.0, |
| "step": 819000 |
| }, |
| { |
| "epoch": 1.308214132064975, |
| "grad_norm": 9.039805445354432e-05, |
| "learning_rate": 8.024006560140392e-06, |
| "loss": 0.0, |
| "step": 819500 |
| }, |
| { |
| "epoch": 1.3090123103029645, |
| "grad_norm": 0.009568951092660427, |
| "learning_rate": 8.007356333855626e-06, |
| "loss": 0.0, |
| "step": 820000 |
| }, |
| { |
| "epoch": 1.309810488540954, |
| "grad_norm": 0.00014985322195570916, |
| "learning_rate": 7.990717108472138e-06, |
| "loss": 0.0, |
| "step": 820500 |
| }, |
| { |
| "epoch": 1.3106086667789438, |
| "grad_norm": 0.00019329690258018672, |
| "learning_rate": 7.974088910166944e-06, |
| "loss": 0.0, |
| "step": 821000 |
| }, |
| { |
| "epoch": 1.3114068450169334, |
| "grad_norm": 0.00029314137645997107, |
| "learning_rate": 7.957471765099701e-06, |
| "loss": 0.0, |
| "step": 821500 |
| }, |
| { |
| "epoch": 1.312205023254923, |
| "grad_norm": 0.00020969565957784653, |
| "learning_rate": 7.940865699412673e-06, |
| "loss": 0.0, |
| "step": 822000 |
| }, |
| { |
| "epoch": 1.3130032014929125, |
| "grad_norm": 0.00025969123817048967, |
| "learning_rate": 7.92427073923071e-06, |
| "loss": 0.0, |
| "step": 822500 |
| }, |
| { |
| "epoch": 1.313801379730902, |
| "grad_norm": 0.000144842459121719, |
| "learning_rate": 7.907686910661158e-06, |
| "loss": 0.0, |
| "step": 823000 |
| }, |
| { |
| "epoch": 1.3145995579688918, |
| "grad_norm": 0.00017976704111788422, |
| "learning_rate": 7.8911142397939e-06, |
| "loss": 0.0, |
| "step": 823500 |
| }, |
| { |
| "epoch": 1.3153977362068814, |
| "grad_norm": 0.0001856670278357342, |
| "learning_rate": 7.874552752701218e-06, |
| "loss": 0.0, |
| "step": 824000 |
| }, |
| { |
| "epoch": 1.316195914444871, |
| "grad_norm": 0.0002021729596890509, |
| "learning_rate": 7.858002475437825e-06, |
| "loss": 0.0, |
| "step": 824500 |
| }, |
| { |
| "epoch": 1.3169940926828607, |
| "grad_norm": 0.00013654265785589814, |
| "learning_rate": 7.8414634340408e-06, |
| "loss": 0.0, |
| "step": 825000 |
| }, |
| { |
| "epoch": 1.3177922709208503, |
| "grad_norm": 0.00013490175479091704, |
| "learning_rate": 7.824935654529525e-06, |
| "loss": 0.0, |
| "step": 825500 |
| }, |
| { |
| "epoch": 1.3185904491588398, |
| "grad_norm": 9.905237675411627e-05, |
| "learning_rate": 7.808419162905695e-06, |
| "loss": 0.0, |
| "step": 826000 |
| }, |
| { |
| "epoch": 1.3193886273968296, |
| "grad_norm": 0.00026544061256572604, |
| "learning_rate": 7.791913985153204e-06, |
| "loss": 0.0, |
| "step": 826500 |
| }, |
| { |
| "epoch": 1.3201868056348192, |
| "grad_norm": 0.0002780807844828814, |
| "learning_rate": 7.775420147238204e-06, |
| "loss": 0.0, |
| "step": 827000 |
| }, |
| { |
| "epoch": 1.3209849838728087, |
| "grad_norm": 0.00015520601300522685, |
| "learning_rate": 7.75893767510896e-06, |
| "loss": 0.0, |
| "step": 827500 |
| }, |
| { |
| "epoch": 1.3217831621107983, |
| "grad_norm": 0.00017638910503592342, |
| "learning_rate": 7.74246659469587e-06, |
| "loss": 0.0, |
| "step": 828000 |
| }, |
| { |
| "epoch": 1.3225813403487878, |
| "grad_norm": 0.0001925562391988933, |
| "learning_rate": 7.726006931911415e-06, |
| "loss": 0.0, |
| "step": 828500 |
| }, |
| { |
| "epoch": 1.3233795185867776, |
| "grad_norm": 0.00015528348740190268, |
| "learning_rate": 7.709558712650111e-06, |
| "loss": 0.0, |
| "step": 829000 |
| }, |
| { |
| "epoch": 1.3241776968247672, |
| "grad_norm": 0.004120807629078627, |
| "learning_rate": 7.693121962788482e-06, |
| "loss": 0.0, |
| "step": 829500 |
| }, |
| { |
| "epoch": 1.3249758750627567, |
| "grad_norm": 0.00016603163385298103, |
| "learning_rate": 7.676696708184975e-06, |
| "loss": 0.0, |
| "step": 830000 |
| }, |
| { |
| "epoch": 1.3257740533007465, |
| "grad_norm": 0.00022943579824641347, |
| "learning_rate": 7.66028297468e-06, |
| "loss": 0.0, |
| "step": 830500 |
| }, |
| { |
| "epoch": 1.326572231538736, |
| "grad_norm": 0.005338750313967466, |
| "learning_rate": 7.643880788095805e-06, |
| "loss": 0.0, |
| "step": 831000 |
| }, |
| { |
| "epoch": 1.3273704097767256, |
| "grad_norm": 0.00014208181528374553, |
| "learning_rate": 7.62749017423648e-06, |
| "loss": 0.0, |
| "step": 831500 |
| }, |
| { |
| "epoch": 1.3281685880147152, |
| "grad_norm": 0.00011977060785284266, |
| "learning_rate": 7.611111158887916e-06, |
| "loss": 0.0, |
| "step": 832000 |
| }, |
| { |
| "epoch": 1.3289667662527047, |
| "grad_norm": 0.00016354784020222723, |
| "learning_rate": 7.594743767817755e-06, |
| "loss": 0.0, |
| "step": 832500 |
| }, |
| { |
| "epoch": 1.3297649444906945, |
| "grad_norm": 0.00036256128805689514, |
| "learning_rate": 7.578388026775356e-06, |
| "loss": 0.0, |
| "step": 833000 |
| }, |
| { |
| "epoch": 1.330563122728684, |
| "grad_norm": 9.693684114608914e-05, |
| "learning_rate": 7.5620439614917334e-06, |
| "loss": 0.0, |
| "step": 833500 |
| }, |
| { |
| "epoch": 1.3313613009666736, |
| "grad_norm": 0.00012745718413498253, |
| "learning_rate": 7.54571159767955e-06, |
| "loss": 0.0, |
| "step": 834000 |
| }, |
| { |
| "epoch": 1.3321594792046634, |
| "grad_norm": 0.00015118411101866513, |
| "learning_rate": 7.52939096103306e-06, |
| "loss": 0.0, |
| "step": 834500 |
| }, |
| { |
| "epoch": 1.332957657442653, |
| "grad_norm": 0.0001406726660206914, |
| "learning_rate": 7.5130820772280494e-06, |
| "loss": 0.0, |
| "step": 835000 |
| }, |
| { |
| "epoch": 1.3337558356806425, |
| "grad_norm": 0.00023740965116303414, |
| "learning_rate": 7.496784971921836e-06, |
| "loss": 0.0, |
| "step": 835500 |
| }, |
| { |
| "epoch": 1.334554013918632, |
| "grad_norm": 0.00013875133299734443, |
| "learning_rate": 7.4804996707531974e-06, |
| "loss": 0.0, |
| "step": 836000 |
| }, |
| { |
| "epoch": 1.3353521921566216, |
| "grad_norm": 0.0002137289848178625, |
| "learning_rate": 7.464226199342347e-06, |
| "loss": 0.0, |
| "step": 836500 |
| }, |
| { |
| "epoch": 1.3361503703946114, |
| "grad_norm": 0.00043378453119657934, |
| "learning_rate": 7.4479645832908724e-06, |
| "loss": 0.0, |
| "step": 837000 |
| }, |
| { |
| "epoch": 1.336948548632601, |
| "grad_norm": 0.00016079274064395577, |
| "learning_rate": 7.431714848181727e-06, |
| "loss": 0.0, |
| "step": 837500 |
| }, |
| { |
| "epoch": 1.3377467268705905, |
| "grad_norm": 0.0005202249740250409, |
| "learning_rate": 7.415477019579172e-06, |
| "loss": 0.0, |
| "step": 838000 |
| }, |
| { |
| "epoch": 1.3385449051085803, |
| "grad_norm": 0.000191315877600573, |
| "learning_rate": 7.39925112302872e-06, |
| "loss": 0.0, |
| "step": 838500 |
| }, |
| { |
| "epoch": 1.3393430833465698, |
| "grad_norm": 0.23445342481136322, |
| "learning_rate": 7.383037184057128e-06, |
| "loss": 0.0, |
| "step": 839000 |
| }, |
| { |
| "epoch": 1.3401412615845594, |
| "grad_norm": 0.01140950620174408, |
| "learning_rate": 7.36683522817234e-06, |
| "loss": 0.0, |
| "step": 839500 |
| }, |
| { |
| "epoch": 1.3409394398225492, |
| "grad_norm": 0.00017085122817661613, |
| "learning_rate": 7.35064528086345e-06, |
| "loss": 0.0, |
| "step": 840000 |
| }, |
| { |
| "epoch": 1.3417376180605387, |
| "grad_norm": 0.0003418387204874307, |
| "learning_rate": 7.334467367600643e-06, |
| "loss": 0.0, |
| "step": 840500 |
| }, |
| { |
| "epoch": 1.3425357962985283, |
| "grad_norm": 0.0002189553779317066, |
| "learning_rate": 7.318301513835188e-06, |
| "loss": 0.0, |
| "step": 841000 |
| }, |
| { |
| "epoch": 1.3433339745365178, |
| "grad_norm": 0.00013727162149734795, |
| "learning_rate": 7.3021477449993866e-06, |
| "loss": 0.0, |
| "step": 841500 |
| }, |
| { |
| "epoch": 1.3441321527745074, |
| "grad_norm": 0.00011598570563364774, |
| "learning_rate": 7.2860060865065075e-06, |
| "loss": 0.0, |
| "step": 842000 |
| }, |
| { |
| "epoch": 1.3449303310124971, |
| "grad_norm": 0.0001249085908057168, |
| "learning_rate": 7.269876563750783e-06, |
| "loss": 0.0, |
| "step": 842500 |
| }, |
| { |
| "epoch": 1.3457285092504867, |
| "grad_norm": 350.6066589355469, |
| "learning_rate": 7.253759202107352e-06, |
| "loss": 0.0, |
| "step": 843000 |
| }, |
| { |
| "epoch": 1.3465266874884763, |
| "grad_norm": 0.00022806675406172872, |
| "learning_rate": 7.237654026932224e-06, |
| "loss": 0.0, |
| "step": 843500 |
| }, |
| { |
| "epoch": 1.347324865726466, |
| "grad_norm": 0.0001397529267705977, |
| "learning_rate": 7.221561063562219e-06, |
| "loss": 0.0, |
| "step": 844000 |
| }, |
| { |
| "epoch": 1.3481230439644556, |
| "grad_norm": 0.0014991023344919086, |
| "learning_rate": 7.205480337314964e-06, |
| "loss": 0.0, |
| "step": 844500 |
| }, |
| { |
| "epoch": 1.3489212222024451, |
| "grad_norm": 0.0007024999940767884, |
| "learning_rate": 7.189411873488836e-06, |
| "loss": 0.0, |
| "step": 845000 |
| }, |
| { |
| "epoch": 1.3497194004404347, |
| "grad_norm": 0.00010580118396319449, |
| "learning_rate": 7.173355697362898e-06, |
| "loss": 0.0, |
| "step": 845500 |
| }, |
| { |
| "epoch": 1.3505175786784243, |
| "grad_norm": 0.0036384917329996824, |
| "learning_rate": 7.157311834196908e-06, |
| "loss": 0.0, |
| "step": 846000 |
| }, |
| { |
| "epoch": 1.351315756916414, |
| "grad_norm": 0.0009470462100580335, |
| "learning_rate": 7.141280309231241e-06, |
| "loss": 0.0, |
| "step": 846500 |
| }, |
| { |
| "epoch": 1.3521139351544036, |
| "grad_norm": 0.00014973332872614264, |
| "learning_rate": 7.125261147686855e-06, |
| "loss": 0.0, |
| "step": 847000 |
| }, |
| { |
| "epoch": 1.3529121133923931, |
| "grad_norm": 0.0022423311602324247, |
| "learning_rate": 7.10925437476527e-06, |
| "loss": 0.0, |
| "step": 847500 |
| }, |
| { |
| "epoch": 1.353710291630383, |
| "grad_norm": 0.0001411356934113428, |
| "learning_rate": 7.093260015648512e-06, |
| "loss": 0.0, |
| "step": 848000 |
| }, |
| { |
| "epoch": 1.3545084698683725, |
| "grad_norm": 0.0017032199539244175, |
| "learning_rate": 7.077278095499081e-06, |
| "loss": 0.0, |
| "step": 848500 |
| }, |
| { |
| "epoch": 1.355306648106362, |
| "grad_norm": 0.00015193774015642703, |
| "learning_rate": 7.061308639459893e-06, |
| "loss": 0.0, |
| "step": 849000 |
| }, |
| { |
| "epoch": 1.3561048263443516, |
| "grad_norm": 2119.16162109375, |
| "learning_rate": 7.04535167265427e-06, |
| "loss": 0.0, |
| "step": 849500 |
| }, |
| { |
| "epoch": 1.3569030045823411, |
| "grad_norm": 0.00017610577924642712, |
| "learning_rate": 7.0294072201858885e-06, |
| "loss": 0.0, |
| "step": 850000 |
| }, |
| { |
| "epoch": 1.357701182820331, |
| "grad_norm": 0.000205761069082655, |
| "learning_rate": 7.01347530713872e-06, |
| "loss": 0.0, |
| "step": 850500 |
| }, |
| { |
| "epoch": 1.3584993610583205, |
| "grad_norm": 0.0001279138377867639, |
| "learning_rate": 6.9975559585770245e-06, |
| "loss": 0.0, |
| "step": 851000 |
| }, |
| { |
| "epoch": 1.35929753929631, |
| "grad_norm": 0.00014980623382143676, |
| "learning_rate": 6.981649199545289e-06, |
| "loss": 0.0, |
| "step": 851500 |
| }, |
| { |
| "epoch": 1.3600957175342998, |
| "grad_norm": 0.0001443100773030892, |
| "learning_rate": 6.9657550550682035e-06, |
| "loss": 0.0, |
| "step": 852000 |
| }, |
| { |
| "epoch": 1.3608938957722894, |
| "grad_norm": 0.002811576472595334, |
| "learning_rate": 6.949873550150591e-06, |
| "loss": 0.0, |
| "step": 852500 |
| }, |
| { |
| "epoch": 1.361692074010279, |
| "grad_norm": 0.00020981239504180849, |
| "learning_rate": 6.93400470977741e-06, |
| "loss": 0.0, |
| "step": 853000 |
| }, |
| { |
| "epoch": 1.3624902522482685, |
| "grad_norm": 0.2709546983242035, |
| "learning_rate": 6.918148558913697e-06, |
| "loss": 0.0, |
| "step": 853500 |
| }, |
| { |
| "epoch": 1.363288430486258, |
| "grad_norm": 0.0001471164432587102, |
| "learning_rate": 6.902305122504502e-06, |
| "loss": 0.0, |
| "step": 854000 |
| }, |
| { |
| "epoch": 1.3640866087242478, |
| "grad_norm": 0.00018949166405946016, |
| "learning_rate": 6.886474425474902e-06, |
| "loss": 0.0, |
| "step": 854500 |
| }, |
| { |
| "epoch": 1.3648847869622374, |
| "grad_norm": 0.00011930407345062122, |
| "learning_rate": 6.870656492729898e-06, |
| "loss": 0.0, |
| "step": 855000 |
| }, |
| { |
| "epoch": 1.365682965200227, |
| "grad_norm": 9.36063879635185e-05, |
| "learning_rate": 6.854851349154454e-06, |
| "loss": 0.0, |
| "step": 855500 |
| }, |
| { |
| "epoch": 1.3664811434382167, |
| "grad_norm": 0.24007728695869446, |
| "learning_rate": 6.839059019613378e-06, |
| "loss": 0.0, |
| "step": 856000 |
| }, |
| { |
| "epoch": 1.3672793216762062, |
| "grad_norm": 0.00011204006295884028, |
| "learning_rate": 6.82327952895132e-06, |
| "loss": 0.0, |
| "step": 856500 |
| }, |
| { |
| "epoch": 1.3680774999141958, |
| "grad_norm": 7.074438326526433e-05, |
| "learning_rate": 6.807512901992764e-06, |
| "loss": 0.0, |
| "step": 857000 |
| }, |
| { |
| "epoch": 1.3688756781521856, |
| "grad_norm": 0.000122178447782062, |
| "learning_rate": 6.791759163541918e-06, |
| "loss": 0.0, |
| "step": 857500 |
| }, |
| { |
| "epoch": 1.3696738563901751, |
| "grad_norm": 0.0001083787574316375, |
| "learning_rate": 6.776018338382742e-06, |
| "loss": 0.0, |
| "step": 858000 |
| }, |
| { |
| "epoch": 1.3704720346281647, |
| "grad_norm": 0.00013601896353065968, |
| "learning_rate": 6.760290451278853e-06, |
| "loss": 0.0, |
| "step": 858500 |
| }, |
| { |
| "epoch": 1.3712702128661542, |
| "grad_norm": 0.001532147522084415, |
| "learning_rate": 6.744575526973552e-06, |
| "loss": 0.0, |
| "step": 859000 |
| }, |
| { |
| "epoch": 1.3720683911041438, |
| "grad_norm": 0.00012492662062868476, |
| "learning_rate": 6.728873590189714e-06, |
| "loss": 0.0, |
| "step": 859500 |
| }, |
| { |
| "epoch": 1.3728665693421336, |
| "grad_norm": 0.00010753708920674399, |
| "learning_rate": 6.713184665629786e-06, |
| "loss": 0.0, |
| "step": 860000 |
| }, |
| { |
| "epoch": 1.3736647475801231, |
| "grad_norm": 0.0001573436165926978, |
| "learning_rate": 6.69750877797576e-06, |
| "loss": 0.0, |
| "step": 860500 |
| }, |
| { |
| "epoch": 1.3744629258181127, |
| "grad_norm": 9.552572009852156e-05, |
| "learning_rate": 6.681845951889103e-06, |
| "loss": 0.0, |
| "step": 861000 |
| }, |
| { |
| "epoch": 1.3752611040561025, |
| "grad_norm": 0.00018931551312562078, |
| "learning_rate": 6.66619621201075e-06, |
| "loss": 0.0, |
| "step": 861500 |
| }, |
| { |
| "epoch": 1.376059282294092, |
| "grad_norm": 0.0004058021877426654, |
| "learning_rate": 6.650559582961019e-06, |
| "loss": 0.0, |
| "step": 862000 |
| }, |
| { |
| "epoch": 1.3768574605320816, |
| "grad_norm": 0.0001997623621718958, |
| "learning_rate": 6.634936089339643e-06, |
| "loss": 0.0, |
| "step": 862500 |
| }, |
| { |
| "epoch": 1.3776556387700711, |
| "grad_norm": 0.0004893583245575428, |
| "learning_rate": 6.619325755725658e-06, |
| "loss": 0.0, |
| "step": 863000 |
| }, |
| { |
| "epoch": 1.3784538170080607, |
| "grad_norm": 0.00013030781701672822, |
| "learning_rate": 6.603728606677401e-06, |
| "loss": 0.0, |
| "step": 863500 |
| }, |
| { |
| "epoch": 1.3792519952460505, |
| "grad_norm": 0.00012525140482466668, |
| "learning_rate": 6.588144666732477e-06, |
| "loss": 0.0, |
| "step": 864000 |
| }, |
| { |
| "epoch": 1.38005017348404, |
| "grad_norm": 0.00019924509979318827, |
| "learning_rate": 6.572573960407707e-06, |
| "loss": 0.0, |
| "step": 864500 |
| }, |
| { |
| "epoch": 1.3808483517220296, |
| "grad_norm": 0.0001296091650146991, |
| "learning_rate": 6.557016512199096e-06, |
| "loss": 0.0, |
| "step": 865000 |
| }, |
| { |
| "epoch": 1.3816465299600194, |
| "grad_norm": 0.00019620211969595402, |
| "learning_rate": 6.541472346581777e-06, |
| "loss": 0.0, |
| "step": 865500 |
| }, |
| { |
| "epoch": 1.382444708198009, |
| "grad_norm": 0.00017245823983103037, |
| "learning_rate": 6.525941488010001e-06, |
| "loss": 0.0, |
| "step": 866000 |
| }, |
| { |
| "epoch": 1.3832428864359985, |
| "grad_norm": 0.00019742832228075713, |
| "learning_rate": 6.510423960917086e-06, |
| "loss": 0.0, |
| "step": 866500 |
| }, |
| { |
| "epoch": 1.384041064673988, |
| "grad_norm": 0.0001866811653599143, |
| "learning_rate": 6.494919789715358e-06, |
| "loss": 0.0, |
| "step": 867000 |
| }, |
| { |
| "epoch": 1.3848392429119776, |
| "grad_norm": 0.003743622684851289, |
| "learning_rate": 6.479428998796151e-06, |
| "loss": 0.0, |
| "step": 867500 |
| }, |
| { |
| "epoch": 1.3856374211499674, |
| "grad_norm": 0.00019897932361345738, |
| "learning_rate": 6.463951612529742e-06, |
| "loss": 0.0, |
| "step": 868000 |
| }, |
| { |
| "epoch": 1.386435599387957, |
| "grad_norm": 0.00018085265764966607, |
| "learning_rate": 6.448487655265323e-06, |
| "loss": 0.0, |
| "step": 868500 |
| }, |
| { |
| "epoch": 1.3872337776259465, |
| "grad_norm": 0.00022326891485136002, |
| "learning_rate": 6.433037151330946e-06, |
| "loss": 0.0, |
| "step": 869000 |
| }, |
| { |
| "epoch": 1.3880319558639362, |
| "grad_norm": 0.0009524719207547605, |
| "learning_rate": 6.417600125033513e-06, |
| "loss": 0.0, |
| "step": 869500 |
| }, |
| { |
| "epoch": 1.3888301341019258, |
| "grad_norm": 0.00014634850958827883, |
| "learning_rate": 6.402176600658723e-06, |
| "loss": 0.0, |
| "step": 870000 |
| }, |
| { |
| "epoch": 1.3896283123399154, |
| "grad_norm": 0.00020068578305654228, |
| "learning_rate": 6.386766602471019e-06, |
| "loss": 0.0, |
| "step": 870500 |
| }, |
| { |
| "epoch": 1.3904264905779051, |
| "grad_norm": 0.00010230097541352734, |
| "learning_rate": 6.371370154713577e-06, |
| "loss": 0.0, |
| "step": 871000 |
| }, |
| { |
| "epoch": 1.3912246688158947, |
| "grad_norm": 0.00012358248932287097, |
| "learning_rate": 6.355987281608255e-06, |
| "loss": 0.0, |
| "step": 871500 |
| }, |
| { |
| "epoch": 1.3920228470538842, |
| "grad_norm": 0.00010632740304572508, |
| "learning_rate": 6.340618007355554e-06, |
| "loss": 0.0, |
| "step": 872000 |
| }, |
| { |
| "epoch": 1.3928210252918738, |
| "grad_norm": 9.845475142356008e-05, |
| "learning_rate": 6.325262356134572e-06, |
| "loss": 0.0, |
| "step": 872500 |
| }, |
| { |
| "epoch": 1.3936192035298633, |
| "grad_norm": 156.6703338623047, |
| "learning_rate": 6.309920352102985e-06, |
| "loss": 0.0, |
| "step": 873000 |
| }, |
| { |
| "epoch": 1.3944173817678531, |
| "grad_norm": 0.0001712299999780953, |
| "learning_rate": 6.294592019397005e-06, |
| "loss": 0.0, |
| "step": 873500 |
| }, |
| { |
| "epoch": 1.3952155600058427, |
| "grad_norm": 0.00036461843410506845, |
| "learning_rate": 6.279277382131317e-06, |
| "loss": 0.0, |
| "step": 874000 |
| }, |
| { |
| "epoch": 1.3960137382438322, |
| "grad_norm": 0.0001327757054241374, |
| "learning_rate": 6.2639764643990735e-06, |
| "loss": 0.0, |
| "step": 874500 |
| }, |
| { |
| "epoch": 1.396811916481822, |
| "grad_norm": 0.0001479165512137115, |
| "learning_rate": 6.248689290271848e-06, |
| "loss": 0.0, |
| "step": 875000 |
| }, |
| { |
| "epoch": 1.3976100947198116, |
| "grad_norm": 8.516235538991168e-05, |
| "learning_rate": 6.233415883799577e-06, |
| "loss": 0.0, |
| "step": 875500 |
| }, |
| { |
| "epoch": 1.3984082729578011, |
| "grad_norm": 0.00011310012632748112, |
| "learning_rate": 6.218156269010544e-06, |
| "loss": 0.0, |
| "step": 876000 |
| }, |
| { |
| "epoch": 1.3992064511957907, |
| "grad_norm": 0.00013644126011058688, |
| "learning_rate": 6.202910469911346e-06, |
| "loss": 0.0, |
| "step": 876500 |
| }, |
| { |
| "epoch": 1.4000046294337802, |
| "grad_norm": 0.0001388086675433442, |
| "learning_rate": 6.187678510486834e-06, |
| "loss": 0.0, |
| "step": 877000 |
| }, |
| { |
| "epoch": 1.40080280767177, |
| "grad_norm": 0.00012577083543874323, |
| "learning_rate": 6.172460414700082e-06, |
| "loss": 0.0, |
| "step": 877500 |
| }, |
| { |
| "epoch": 1.4016009859097596, |
| "grad_norm": 0.0001132668912759982, |
| "learning_rate": 6.157256206492363e-06, |
| "loss": 0.0, |
| "step": 878000 |
| }, |
| { |
| "epoch": 1.4023991641477491, |
| "grad_norm": 0.00019735400564968586, |
| "learning_rate": 6.1420659097831064e-06, |
| "loss": 0.0, |
| "step": 878500 |
| }, |
| { |
| "epoch": 1.403197342385739, |
| "grad_norm": 0.00015007038018666208, |
| "learning_rate": 6.126889548469834e-06, |
| "loss": 0.0, |
| "step": 879000 |
| }, |
| { |
| "epoch": 1.4039955206237285, |
| "grad_norm": 0.00014871565508656204, |
| "learning_rate": 6.111727146428168e-06, |
| "loss": 0.0, |
| "step": 879500 |
| }, |
| { |
| "epoch": 1.404793698861718, |
| "grad_norm": 0.0001545920968055725, |
| "learning_rate": 6.096578727511758e-06, |
| "loss": 0.0, |
| "step": 880000 |
| }, |
| { |
| "epoch": 1.4055918770997076, |
| "grad_norm": 0.009757994674146175, |
| "learning_rate": 6.081444315552264e-06, |
| "loss": 0.0, |
| "step": 880500 |
| }, |
| { |
| "epoch": 1.4063900553376971, |
| "grad_norm": 0.00010370996460551396, |
| "learning_rate": 6.066323934359293e-06, |
| "loss": 0.0, |
| "step": 881000 |
| }, |
| { |
| "epoch": 1.407188233575687, |
| "grad_norm": 0.0002125945466104895, |
| "learning_rate": 6.051217607720393e-06, |
| "loss": 0.0, |
| "step": 881500 |
| }, |
| { |
| "epoch": 1.4079864118136765, |
| "grad_norm": 0.0001540460652904585, |
| "learning_rate": 6.036125359401005e-06, |
| "loss": 0.0, |
| "step": 882000 |
| }, |
| { |
| "epoch": 1.408784590051666, |
| "grad_norm": 0.016482815146446228, |
| "learning_rate": 6.0210472131444e-06, |
| "loss": 0.0, |
| "step": 882500 |
| }, |
| { |
| "epoch": 1.4095827682896558, |
| "grad_norm": 0.0001845559454523027, |
| "learning_rate": 6.005983192671689e-06, |
| "loss": 0.0, |
| "step": 883000 |
| }, |
| { |
| "epoch": 1.4103809465276453, |
| "grad_norm": 0.00020905568089801818, |
| "learning_rate": 5.990933321681743e-06, |
| "loss": 0.0, |
| "step": 883500 |
| }, |
| { |
| "epoch": 1.411179124765635, |
| "grad_norm": 0.00013950113498140126, |
| "learning_rate": 5.9758976238511895e-06, |
| "loss": 0.0, |
| "step": 884000 |
| }, |
| { |
| "epoch": 1.4119773030036245, |
| "grad_norm": 0.00010148427099920809, |
| "learning_rate": 5.960876122834338e-06, |
| "loss": 0.0, |
| "step": 884500 |
| }, |
| { |
| "epoch": 1.412775481241614, |
| "grad_norm": 0.00019023822096642107, |
| "learning_rate": 5.945868842263167e-06, |
| "loss": 0.0, |
| "step": 885000 |
| }, |
| { |
| "epoch": 1.4135736594796038, |
| "grad_norm": 0.0007381364703178406, |
| "learning_rate": 5.930875805747308e-06, |
| "loss": 0.0, |
| "step": 885500 |
| }, |
| { |
| "epoch": 1.4143718377175933, |
| "grad_norm": 0.00023581883579026908, |
| "learning_rate": 5.915897036873949e-06, |
| "loss": 0.0, |
| "step": 886000 |
| }, |
| { |
| "epoch": 1.415170015955583, |
| "grad_norm": 0.00018272988381795585, |
| "learning_rate": 5.900932559207857e-06, |
| "loss": 0.0, |
| "step": 886500 |
| }, |
| { |
| "epoch": 1.4159681941935727, |
| "grad_norm": 0.00017383633530698717, |
| "learning_rate": 5.885982396291304e-06, |
| "loss": 0.0, |
| "step": 887000 |
| }, |
| { |
| "epoch": 1.4167663724315622, |
| "grad_norm": 0.00023192846856545657, |
| "learning_rate": 5.871046571644052e-06, |
| "loss": 0.0, |
| "step": 887500 |
| }, |
| { |
| "epoch": 1.4175645506695518, |
| "grad_norm": 0.00807888526469469, |
| "learning_rate": 5.8561251087632925e-06, |
| "loss": 0.0, |
| "step": 888000 |
| }, |
| { |
| "epoch": 1.4183627289075416, |
| "grad_norm": 0.00855404045432806, |
| "learning_rate": 5.84121803112362e-06, |
| "loss": 0.0, |
| "step": 888500 |
| }, |
| { |
| "epoch": 1.4191609071455311, |
| "grad_norm": 0.0001990912714973092, |
| "learning_rate": 5.826325362177028e-06, |
| "loss": 0.0, |
| "step": 889000 |
| }, |
| { |
| "epoch": 1.4199590853835207, |
| "grad_norm": 0.00013648335880134255, |
| "learning_rate": 5.811447125352806e-06, |
| "loss": 0.0, |
| "step": 889500 |
| }, |
| { |
| "epoch": 1.4207572636215102, |
| "grad_norm": 0.0002360754006076604, |
| "learning_rate": 5.796583344057563e-06, |
| "loss": 0.0, |
| "step": 890000 |
| }, |
| { |
| "epoch": 1.4215554418594998, |
| "grad_norm": 0.00024741183733567595, |
| "learning_rate": 5.781734041675143e-06, |
| "loss": 0.0, |
| "step": 890500 |
| }, |
| { |
| "epoch": 1.4223536200974896, |
| "grad_norm": 0.0004946837434545159, |
| "learning_rate": 5.76689924156665e-06, |
| "loss": 0.0, |
| "step": 891000 |
| }, |
| { |
| "epoch": 1.4231517983354791, |
| "grad_norm": 0.00025075749726966023, |
| "learning_rate": 5.752078967070334e-06, |
| "loss": 0.0, |
| "step": 891500 |
| }, |
| { |
| "epoch": 1.4239499765734687, |
| "grad_norm": 0.00029719286249019206, |
| "learning_rate": 5.737273241501599e-06, |
| "loss": 0.0, |
| "step": 892000 |
| }, |
| { |
| "epoch": 1.4247481548114584, |
| "grad_norm": 0.0014194652903825045, |
| "learning_rate": 5.722482088152992e-06, |
| "loss": 0.0, |
| "step": 892500 |
| }, |
| { |
| "epoch": 1.425546333049448, |
| "grad_norm": 0.00022001670731697232, |
| "learning_rate": 5.7077055302940966e-06, |
| "loss": 0.0, |
| "step": 893000 |
| }, |
| { |
| "epoch": 1.4263445112874376, |
| "grad_norm": 0.00024666590616106987, |
| "learning_rate": 5.692943591171561e-06, |
| "loss": 0.0, |
| "step": 893500 |
| }, |
| { |
| "epoch": 1.427142689525427, |
| "grad_norm": 0.0002910353650804609, |
| "learning_rate": 5.6781962940090146e-06, |
| "loss": 0.0, |
| "step": 894000 |
| }, |
| { |
| "epoch": 1.4279408677634167, |
| "grad_norm": 2379.8115234375, |
| "learning_rate": 5.663463662007065e-06, |
| "loss": 0.0, |
| "step": 894500 |
| }, |
| { |
| "epoch": 1.4287390460014064, |
| "grad_norm": 0.00021346789435483515, |
| "learning_rate": 5.6487457183432515e-06, |
| "loss": 0.0, |
| "step": 895000 |
| }, |
| { |
| "epoch": 1.429537224239396, |
| "grad_norm": 0.00022312205692287534, |
| "learning_rate": 5.634042486171992e-06, |
| "loss": 0.0, |
| "step": 895500 |
| }, |
| { |
| "epoch": 1.4303354024773856, |
| "grad_norm": 0.0034749663900583982, |
| "learning_rate": 5.6193539886245685e-06, |
| "loss": 0.0, |
| "step": 896000 |
| }, |
| { |
| "epoch": 1.4311335807153753, |
| "grad_norm": 0.0002445397840347141, |
| "learning_rate": 5.604680248809083e-06, |
| "loss": 0.0, |
| "step": 896500 |
| }, |
| { |
| "epoch": 1.4319317589533649, |
| "grad_norm": 0.00021094012481626123, |
| "learning_rate": 5.590021289810424e-06, |
| "loss": 0.0, |
| "step": 897000 |
| }, |
| { |
| "epoch": 1.4327299371913544, |
| "grad_norm": 0.0002627313369885087, |
| "learning_rate": 5.575377134690211e-06, |
| "loss": 0.0, |
| "step": 897500 |
| }, |
| { |
| "epoch": 1.433528115429344, |
| "grad_norm": 0.0008308569085784256, |
| "learning_rate": 5.56074780648679e-06, |
| "loss": 0.0, |
| "step": 898000 |
| }, |
| { |
| "epoch": 1.4343262936673336, |
| "grad_norm": 0.0001924873940879479, |
| "learning_rate": 5.54613332821518e-06, |
| "loss": 0.0, |
| "step": 898500 |
| }, |
| { |
| "epoch": 1.4351244719053233, |
| "grad_norm": 0.0002441892575006932, |
| "learning_rate": 5.531533722867024e-06, |
| "loss": 0.0, |
| "step": 899000 |
| }, |
| { |
| "epoch": 1.4359226501433129, |
| "grad_norm": 0.0004263210576027632, |
| "learning_rate": 5.51694901341058e-06, |
| "loss": 0.0, |
| "step": 899500 |
| }, |
| { |
| "epoch": 1.4367208283813024, |
| "grad_norm": 0.0001925066317198798, |
| "learning_rate": 5.50237922279067e-06, |
| "loss": 0.0, |
| "step": 900000 |
| }, |
| { |
| "epoch": 1.4367208283813024, |
| "eval_loss": 1.5834675650694408e-05, |
| "eval_runtime": 25429.0967, |
| "eval_samples_per_second": 87.588, |
| "eval_steps_per_second": 2.737, |
| "step": 900000 |
| }, |
| { |
| "epoch": 1.4375190066192922, |
| "grad_norm": 0.00017933818162418902, |
| "learning_rate": 5.487824373928646e-06, |
| "loss": 0.0, |
| "step": 900500 |
| }, |
| { |
| "epoch": 1.4383171848572818, |
| "grad_norm": 0.00016729129129089415, |
| "learning_rate": 5.473284489722342e-06, |
| "loss": 0.0, |
| "step": 901000 |
| }, |
| { |
| "epoch": 1.4391153630952713, |
| "grad_norm": 0.00021027770708315074, |
| "learning_rate": 5.458759593046065e-06, |
| "loss": 0.0, |
| "step": 901500 |
| }, |
| { |
| "epoch": 1.4399135413332609, |
| "grad_norm": 0.00024220098566729575, |
| "learning_rate": 5.444249706750537e-06, |
| "loss": 0.0, |
| "step": 902000 |
| }, |
| { |
| "epoch": 1.4407117195712507, |
| "grad_norm": 0.0009675936307758093, |
| "learning_rate": 5.42975485366286e-06, |
| "loss": 0.0, |
| "step": 902500 |
| }, |
| { |
| "epoch": 1.4415098978092402, |
| "grad_norm": 0.002770686289295554, |
| "learning_rate": 5.415275056586492e-06, |
| "loss": 0.0, |
| "step": 903000 |
| }, |
| { |
| "epoch": 1.4423080760472298, |
| "grad_norm": 0.00026132797938771546, |
| "learning_rate": 5.400810338301212e-06, |
| "loss": 0.0, |
| "step": 903500 |
| }, |
| { |
| "epoch": 1.4431062542852193, |
| "grad_norm": 0.0002459358365740627, |
| "learning_rate": 5.386360721563056e-06, |
| "loss": 0.0, |
| "step": 904000 |
| }, |
| { |
| "epoch": 1.443904432523209, |
| "grad_norm": 0.0004831771075259894, |
| "learning_rate": 5.371926229104321e-06, |
| "loss": 0.0, |
| "step": 904500 |
| }, |
| { |
| "epoch": 1.4447026107611987, |
| "grad_norm": 0.0001880936761153862, |
| "learning_rate": 5.357506883633503e-06, |
| "loss": 0.0, |
| "step": 905000 |
| }, |
| { |
| "epoch": 1.4455007889991882, |
| "grad_norm": 0.00023097256780602038, |
| "learning_rate": 5.343102707835275e-06, |
| "loss": 0.0, |
| "step": 905500 |
| }, |
| { |
| "epoch": 1.446298967237178, |
| "grad_norm": 0.00031300674891099334, |
| "learning_rate": 5.328713724370429e-06, |
| "loss": 0.0, |
| "step": 906000 |
| }, |
| { |
| "epoch": 1.4470971454751675, |
| "grad_norm": 0.0001413863938068971, |
| "learning_rate": 5.314339955875871e-06, |
| "loss": 0.0, |
| "step": 906500 |
| }, |
| { |
| "epoch": 1.447895323713157, |
| "grad_norm": 0.00023202685406431556, |
| "learning_rate": 5.299981424964573e-06, |
| "loss": 0.0, |
| "step": 907000 |
| }, |
| { |
| "epoch": 1.4486935019511467, |
| "grad_norm": 0.023249566555023193, |
| "learning_rate": 5.285638154225516e-06, |
| "loss": 0.0, |
| "step": 907500 |
| }, |
| { |
| "epoch": 1.4494916801891362, |
| "grad_norm": 0.00018502937746234238, |
| "learning_rate": 5.271310166223693e-06, |
| "loss": 0.0, |
| "step": 908000 |
| }, |
| { |
| "epoch": 1.450289858427126, |
| "grad_norm": 0.0005356152541935444, |
| "learning_rate": 5.256997483500046e-06, |
| "loss": 0.0, |
| "step": 908500 |
| }, |
| { |
| "epoch": 1.4510880366651155, |
| "grad_norm": 0.0037611278239637613, |
| "learning_rate": 5.242700128571443e-06, |
| "loss": 0.0, |
| "step": 909000 |
| }, |
| { |
| "epoch": 1.451886214903105, |
| "grad_norm": 0.0002823321265168488, |
| "learning_rate": 5.2284181239306296e-06, |
| "loss": 0.0, |
| "step": 909500 |
| }, |
| { |
| "epoch": 1.4526843931410949, |
| "grad_norm": 0.0002303695655427873, |
| "learning_rate": 5.214151492046206e-06, |
| "loss": 0.0, |
| "step": 910000 |
| }, |
| { |
| "epoch": 1.4534825713790844, |
| "grad_norm": 0.00018809006724040955, |
| "learning_rate": 5.199900255362598e-06, |
| "loss": 0.0, |
| "step": 910500 |
| }, |
| { |
| "epoch": 1.454280749617074, |
| "grad_norm": 0.0005356586189009249, |
| "learning_rate": 5.18566443629999e-06, |
| "loss": 0.0, |
| "step": 911000 |
| }, |
| { |
| "epoch": 1.4550789278550635, |
| "grad_norm": 0.00024188545648939908, |
| "learning_rate": 5.171444057254331e-06, |
| "loss": 0.0, |
| "step": 911500 |
| }, |
| { |
| "epoch": 1.455877106093053, |
| "grad_norm": 0.0004672040231525898, |
| "learning_rate": 5.15723914059727e-06, |
| "loss": 0.0, |
| "step": 912000 |
| }, |
| { |
| "epoch": 1.4566752843310429, |
| "grad_norm": 0.00026381740462966263, |
| "learning_rate": 5.14304970867614e-06, |
| "loss": 0.0, |
| "step": 912500 |
| }, |
| { |
| "epoch": 1.4574734625690324, |
| "grad_norm": 0.00014812721929047257, |
| "learning_rate": 5.1288757838138945e-06, |
| "loss": 0.0, |
| "step": 913000 |
| }, |
| { |
| "epoch": 1.458271640807022, |
| "grad_norm": 0.0001770323870005086, |
| "learning_rate": 5.114717388309109e-06, |
| "loss": 0.0, |
| "step": 913500 |
| }, |
| { |
| "epoch": 1.4590698190450118, |
| "grad_norm": 0.00019775221880991012, |
| "learning_rate": 5.100574544435927e-06, |
| "loss": 0.0, |
| "step": 914000 |
| }, |
| { |
| "epoch": 1.4598679972830013, |
| "grad_norm": 0.00034710581530816853, |
| "learning_rate": 5.086447274444008e-06, |
| "loss": 0.0, |
| "step": 914500 |
| }, |
| { |
| "epoch": 1.4606661755209909, |
| "grad_norm": 0.00025523340445943177, |
| "learning_rate": 5.072335600558529e-06, |
| "loss": 0.0, |
| "step": 915000 |
| }, |
| { |
| "epoch": 1.4614643537589804, |
| "grad_norm": 0.00015108758816495538, |
| "learning_rate": 5.058239544980128e-06, |
| "loss": 0.0, |
| "step": 915500 |
| }, |
| { |
| "epoch": 1.46226253199697, |
| "grad_norm": 0.00018012001237366349, |
| "learning_rate": 5.044159129884872e-06, |
| "loss": 0.0, |
| "step": 916000 |
| }, |
| { |
| "epoch": 1.4630607102349598, |
| "grad_norm": 0.0001702363369986415, |
| "learning_rate": 5.030094377424215e-06, |
| "loss": 0.0, |
| "step": 916500 |
| }, |
| { |
| "epoch": 1.4638588884729493, |
| "grad_norm": 0.00013683621364179999, |
| "learning_rate": 5.016045309724964e-06, |
| "loss": 0.0, |
| "step": 917000 |
| }, |
| { |
| "epoch": 1.4646570667109389, |
| "grad_norm": 0.0001706535113044083, |
| "learning_rate": 5.002011948889283e-06, |
| "loss": 0.0, |
| "step": 917500 |
| }, |
| { |
| "epoch": 1.4654552449489286, |
| "grad_norm": 0.0001231006026500836, |
| "learning_rate": 4.987994316994589e-06, |
| "loss": 0.0, |
| "step": 918000 |
| }, |
| { |
| "epoch": 1.4662534231869182, |
| "grad_norm": 0.00028214300982654095, |
| "learning_rate": 4.973992436093574e-06, |
| "loss": 0.0, |
| "step": 918500 |
| }, |
| { |
| "epoch": 1.4670516014249078, |
| "grad_norm": 0.00048577613779343665, |
| "learning_rate": 4.960006328214146e-06, |
| "loss": 0.0, |
| "step": 919000 |
| }, |
| { |
| "epoch": 1.4678497796628975, |
| "grad_norm": 0.0019849666859954596, |
| "learning_rate": 4.946036015359403e-06, |
| "loss": 0.0, |
| "step": 919500 |
| }, |
| { |
| "epoch": 1.468647957900887, |
| "grad_norm": 0.0016200316604226828, |
| "learning_rate": 4.9320815195075855e-06, |
| "loss": 0.0, |
| "step": 920000 |
| }, |
| { |
| "epoch": 1.4694461361388766, |
| "grad_norm": 0.0004031170974485576, |
| "learning_rate": 4.918142862612043e-06, |
| "loss": 0.0, |
| "step": 920500 |
| }, |
| { |
| "epoch": 1.4702443143768662, |
| "grad_norm": 0.00026207268820144236, |
| "learning_rate": 4.904220066601242e-06, |
| "loss": 0.0, |
| "step": 921000 |
| }, |
| { |
| "epoch": 1.4710424926148558, |
| "grad_norm": 0.00015835427620913833, |
| "learning_rate": 4.890313153378654e-06, |
| "loss": 0.0, |
| "step": 921500 |
| }, |
| { |
| "epoch": 1.4718406708528455, |
| "grad_norm": 0.0002738400362432003, |
| "learning_rate": 4.8764221448227946e-06, |
| "loss": 0.0, |
| "step": 922000 |
| }, |
| { |
| "epoch": 1.472638849090835, |
| "grad_norm": 0.00012282826355658472, |
| "learning_rate": 4.86254706278713e-06, |
| "loss": 0.0, |
| "step": 922500 |
| }, |
| { |
| "epoch": 1.4734370273288246, |
| "grad_norm": 0.001346366130746901, |
| "learning_rate": 4.848687929100107e-06, |
| "loss": 0.0, |
| "step": 923000 |
| }, |
| { |
| "epoch": 1.4742352055668144, |
| "grad_norm": 0.00027137529104948044, |
| "learning_rate": 4.834844765565053e-06, |
| "loss": 0.0, |
| "step": 923500 |
| }, |
| { |
| "epoch": 1.475033383804804, |
| "grad_norm": 0.00016544785466976464, |
| "learning_rate": 4.821017593960169e-06, |
| "loss": 0.0, |
| "step": 924000 |
| }, |
| { |
| "epoch": 1.4758315620427935, |
| "grad_norm": 0.0002261428744532168, |
| "learning_rate": 4.8072064360385285e-06, |
| "loss": 0.0, |
| "step": 924500 |
| }, |
| { |
| "epoch": 1.476629740280783, |
| "grad_norm": 0.00015417086251545697, |
| "learning_rate": 4.7934113135279755e-06, |
| "loss": 0.0, |
| "step": 925000 |
| }, |
| { |
| "epoch": 1.4774279185187726, |
| "grad_norm": 0.00016388327640015632, |
| "learning_rate": 4.779632248131156e-06, |
| "loss": 0.0, |
| "step": 925500 |
| }, |
| { |
| "epoch": 1.4782260967567624, |
| "grad_norm": 0.00015743187395855784, |
| "learning_rate": 4.76586926152543e-06, |
| "loss": 0.0, |
| "step": 926000 |
| }, |
| { |
| "epoch": 1.479024274994752, |
| "grad_norm": 0.00023688154760748148, |
| "learning_rate": 4.752122375362879e-06, |
| "loss": 0.0, |
| "step": 926500 |
| }, |
| { |
| "epoch": 1.4798224532327415, |
| "grad_norm": 0.00017375938477925956, |
| "learning_rate": 4.7383916112702564e-06, |
| "loss": 0.0, |
| "step": 927000 |
| }, |
| { |
| "epoch": 1.4806206314707313, |
| "grad_norm": 0.00015891625662334263, |
| "learning_rate": 4.724676990848932e-06, |
| "loss": 0.0, |
| "step": 927500 |
| }, |
| { |
| "epoch": 1.4814188097087209, |
| "grad_norm": 0.00025687378365546465, |
| "learning_rate": 4.710978535674908e-06, |
| "loss": 0.0, |
| "step": 928000 |
| }, |
| { |
| "epoch": 1.4822169879467104, |
| "grad_norm": 0.010246257297694683, |
| "learning_rate": 4.69729626729873e-06, |
| "loss": 0.0, |
| "step": 928500 |
| }, |
| { |
| "epoch": 1.4830151661847, |
| "grad_norm": 0.0002825473784469068, |
| "learning_rate": 4.683630207245494e-06, |
| "loss": 0.0, |
| "step": 929000 |
| }, |
| { |
| "epoch": 1.4838133444226895, |
| "grad_norm": 0.00028282523271627724, |
| "learning_rate": 4.669980377014784e-06, |
| "loss": 0.0, |
| "step": 929500 |
| }, |
| { |
| "epoch": 1.4846115226606793, |
| "grad_norm": 0.00032856714096851647, |
| "learning_rate": 4.65634679808066e-06, |
| "loss": 0.0, |
| "step": 930000 |
| }, |
| { |
| "epoch": 1.4854097008986689, |
| "grad_norm": 0.00027473040972836316, |
| "learning_rate": 4.642729491891618e-06, |
| "loss": 0.0, |
| "step": 930500 |
| }, |
| { |
| "epoch": 1.4862078791366584, |
| "grad_norm": 0.0007247019093483686, |
| "learning_rate": 4.629128479870542e-06, |
| "loss": 0.0, |
| "step": 931000 |
| }, |
| { |
| "epoch": 1.4870060573746482, |
| "grad_norm": 0.0002671232505235821, |
| "learning_rate": 4.6155437834146915e-06, |
| "loss": 0.0, |
| "step": 931500 |
| }, |
| { |
| "epoch": 1.4878042356126377, |
| "grad_norm": 0.00021676292817573994, |
| "learning_rate": 4.601975423895655e-06, |
| "loss": 0.0, |
| "step": 932000 |
| }, |
| { |
| "epoch": 1.4886024138506273, |
| "grad_norm": 0.00023404511739499867, |
| "learning_rate": 4.588423422659326e-06, |
| "loss": 0.0, |
| "step": 932500 |
| }, |
| { |
| "epoch": 1.4894005920886169, |
| "grad_norm": 0.0002494192449375987, |
| "learning_rate": 4.574887801025846e-06, |
| "loss": 0.0, |
| "step": 933000 |
| }, |
| { |
| "epoch": 1.4901987703266064, |
| "grad_norm": 0.00022211413306649774, |
| "learning_rate": 4.561368580289603e-06, |
| "loss": 0.0, |
| "step": 933500 |
| }, |
| { |
| "epoch": 1.4909969485645962, |
| "grad_norm": 0.00018211059796158224, |
| "learning_rate": 4.547865781719185e-06, |
| "loss": 0.0, |
| "step": 934000 |
| }, |
| { |
| "epoch": 1.4917951268025857, |
| "grad_norm": 0.00020940111426170915, |
| "learning_rate": 4.534379426557327e-06, |
| "loss": 0.0, |
| "step": 934500 |
| }, |
| { |
| "epoch": 1.4925933050405753, |
| "grad_norm": 0.00019367334607522935, |
| "learning_rate": 4.520909536020911e-06, |
| "loss": 0.0, |
| "step": 935000 |
| }, |
| { |
| "epoch": 1.493391483278565, |
| "grad_norm": 0.0001935142936417833, |
| "learning_rate": 4.5074561313009185e-06, |
| "loss": 0.0, |
| "step": 935500 |
| }, |
| { |
| "epoch": 1.4941896615165546, |
| "grad_norm": 0.00019816748681478202, |
| "learning_rate": 4.494019233562378e-06, |
| "loss": 0.0, |
| "step": 936000 |
| }, |
| { |
| "epoch": 1.4949878397545442, |
| "grad_norm": 0.00024318444775417447, |
| "learning_rate": 4.480598863944364e-06, |
| "loss": 0.0, |
| "step": 936500 |
| }, |
| { |
| "epoch": 1.495786017992534, |
| "grad_norm": 0.0001876988826552406, |
| "learning_rate": 4.467195043559946e-06, |
| "loss": 0.0, |
| "step": 937000 |
| }, |
| { |
| "epoch": 1.4965841962305235, |
| "grad_norm": 0.0003371692728251219, |
| "learning_rate": 4.453807793496158e-06, |
| "loss": 0.0, |
| "step": 937500 |
| }, |
| { |
| "epoch": 1.497382374468513, |
| "grad_norm": 0.0003051517123822123, |
| "learning_rate": 4.440437134813959e-06, |
| "loss": 0.0, |
| "step": 938000 |
| }, |
| { |
| "epoch": 1.4981805527065026, |
| "grad_norm": 0.0002651779795996845, |
| "learning_rate": 4.427083088548214e-06, |
| "loss": 0.0, |
| "step": 938500 |
| }, |
| { |
| "epoch": 1.4989787309444922, |
| "grad_norm": 0.004931437782943249, |
| "learning_rate": 4.413745675707652e-06, |
| "loss": 0.0, |
| "step": 939000 |
| }, |
| { |
| "epoch": 1.499776909182482, |
| "grad_norm": 0.00023486409918405116, |
| "learning_rate": 4.40042491727483e-06, |
| "loss": 0.0, |
| "step": 939500 |
| }, |
| { |
| "epoch": 1.5005750874204715, |
| "grad_norm": 0.0002614731201902032, |
| "learning_rate": 4.387120834206106e-06, |
| "loss": 0.0, |
| "step": 940000 |
| }, |
| { |
| "epoch": 1.501373265658461, |
| "grad_norm": 0.0002206834906246513, |
| "learning_rate": 4.373833447431606e-06, |
| "loss": 0.0, |
| "step": 940500 |
| }, |
| { |
| "epoch": 1.5021714438964509, |
| "grad_norm": 0.00020592297369148582, |
| "learning_rate": 4.360562777855192e-06, |
| "loss": 0.0, |
| "step": 941000 |
| }, |
| { |
| "epoch": 1.5029696221344404, |
| "grad_norm": 0.00018462153093423694, |
| "learning_rate": 4.3473088463544155e-06, |
| "loss": 0.0, |
| "step": 941500 |
| }, |
| { |
| "epoch": 1.50376780037243, |
| "grad_norm": 0.0002245952346129343, |
| "learning_rate": 4.334071673780505e-06, |
| "loss": 0.0, |
| "step": 942000 |
| }, |
| { |
| "epoch": 1.5045659786104197, |
| "grad_norm": 0.0001867782702902332, |
| "learning_rate": 4.320851280958325e-06, |
| "loss": 0.0, |
| "step": 942500 |
| }, |
| { |
| "epoch": 1.505364156848409, |
| "grad_norm": 0.00026211480144411325, |
| "learning_rate": 4.307647688686331e-06, |
| "loss": 0.0, |
| "step": 943000 |
| }, |
| { |
| "epoch": 1.5061623350863989, |
| "grad_norm": 0.003524922300130129, |
| "learning_rate": 4.294460917736556e-06, |
| "loss": 0.0, |
| "step": 943500 |
| }, |
| { |
| "epoch": 1.5069605133243884, |
| "grad_norm": 0.00023060395324137062, |
| "learning_rate": 4.281290988854572e-06, |
| "loss": 0.0, |
| "step": 944000 |
| }, |
| { |
| "epoch": 1.507758691562378, |
| "grad_norm": 0.00018296600319445133, |
| "learning_rate": 4.268137922759453e-06, |
| "loss": 0.0, |
| "step": 944500 |
| }, |
| { |
| "epoch": 1.5085568698003677, |
| "grad_norm": 0.0002622704196255654, |
| "learning_rate": 4.255001740143732e-06, |
| "loss": 0.0, |
| "step": 945000 |
| }, |
| { |
| "epoch": 1.5093550480383573, |
| "grad_norm": 0.0001771461102180183, |
| "learning_rate": 4.2418824616733995e-06, |
| "loss": 0.0, |
| "step": 945500 |
| }, |
| { |
| "epoch": 1.5101532262763468, |
| "grad_norm": 0.0009799576364457607, |
| "learning_rate": 4.228780107987845e-06, |
| "loss": 0.0, |
| "step": 946000 |
| }, |
| { |
| "epoch": 1.5109514045143366, |
| "grad_norm": 0.00039355512126348913, |
| "learning_rate": 4.215694699699823e-06, |
| "loss": 0.0, |
| "step": 946500 |
| }, |
| { |
| "epoch": 1.511749582752326, |
| "grad_norm": 0.00020366064563859254, |
| "learning_rate": 4.202626257395441e-06, |
| "loss": 0.0, |
| "step": 947000 |
| }, |
| { |
| "epoch": 1.5125477609903157, |
| "grad_norm": 0.0002536962565500289, |
| "learning_rate": 4.189574801634108e-06, |
| "loss": 0.0, |
| "step": 947500 |
| }, |
| { |
| "epoch": 1.5133459392283053, |
| "grad_norm": 0.00019765354227274656, |
| "learning_rate": 4.176540352948524e-06, |
| "loss": 0.0, |
| "step": 948000 |
| }, |
| { |
| "epoch": 1.5141441174662948, |
| "grad_norm": 0.00039374298648908734, |
| "learning_rate": 4.1635229318446124e-06, |
| "loss": 0.0, |
| "step": 948500 |
| }, |
| { |
| "epoch": 1.5149422957042846, |
| "grad_norm": 0.00021904372260905802, |
| "learning_rate": 4.150522558801511e-06, |
| "loss": 0.0, |
| "step": 949000 |
| }, |
| { |
| "epoch": 1.5157404739422742, |
| "grad_norm": 0.00021933818061370403, |
| "learning_rate": 4.137539254271564e-06, |
| "loss": 0.0, |
| "step": 949500 |
| }, |
| { |
| "epoch": 1.5165386521802637, |
| "grad_norm": 0.010109743103384972, |
| "learning_rate": 4.124573038680228e-06, |
| "loss": 0.0, |
| "step": 950000 |
| }, |
| { |
| "epoch": 1.5173368304182535, |
| "grad_norm": 0.00024012771609704942, |
| "learning_rate": 4.111623932426095e-06, |
| "loss": 0.0, |
| "step": 950500 |
| }, |
| { |
| "epoch": 1.5181350086562428, |
| "grad_norm": 0.0003029144718311727, |
| "learning_rate": 4.0986919558808405e-06, |
| "loss": 0.0, |
| "step": 951000 |
| }, |
| { |
| "epoch": 1.5189331868942326, |
| "grad_norm": 0.0001967994321603328, |
| "learning_rate": 4.085777129389188e-06, |
| "loss": 0.0, |
| "step": 951500 |
| }, |
| { |
| "epoch": 1.5197313651322222, |
| "grad_norm": 0.00030280096689239144, |
| "learning_rate": 4.072879473268879e-06, |
| "loss": 0.0, |
| "step": 952000 |
| }, |
| { |
| "epoch": 1.5205295433702117, |
| "grad_norm": 0.0005742062348872423, |
| "learning_rate": 4.05999900781063e-06, |
| "loss": 0.0, |
| "step": 952500 |
| }, |
| { |
| "epoch": 1.5213277216082015, |
| "grad_norm": 0.00022738358529750258, |
| "learning_rate": 4.047135753278146e-06, |
| "loss": 0.0, |
| "step": 953000 |
| }, |
| { |
| "epoch": 1.522125899846191, |
| "grad_norm": 0.0001853039429988712, |
| "learning_rate": 4.034289729908022e-06, |
| "loss": 0.0, |
| "step": 953500 |
| }, |
| { |
| "epoch": 1.5229240780841806, |
| "grad_norm": 0.0002849490556400269, |
| "learning_rate": 4.021460957909764e-06, |
| "loss": 0.0, |
| "step": 954000 |
| }, |
| { |
| "epoch": 1.5237222563221704, |
| "grad_norm": 0.0005084871663711965, |
| "learning_rate": 4.008649457465735e-06, |
| "loss": 0.0, |
| "step": 954500 |
| }, |
| { |
| "epoch": 1.5245204345601597, |
| "grad_norm": 0.00028648623265326023, |
| "learning_rate": 3.995855248731117e-06, |
| "loss": 0.0, |
| "step": 955000 |
| }, |
| { |
| "epoch": 1.5253186127981495, |
| "grad_norm": 0.0002624321496114135, |
| "learning_rate": 3.9830783518339005e-06, |
| "loss": 0.0, |
| "step": 955500 |
| }, |
| { |
| "epoch": 1.526116791036139, |
| "grad_norm": 0.0007837973535060883, |
| "learning_rate": 3.970318786874825e-06, |
| "loss": 0.0, |
| "step": 956000 |
| }, |
| { |
| "epoch": 1.5269149692741286, |
| "grad_norm": 0.0002491927589289844, |
| "learning_rate": 3.95757657392739e-06, |
| "loss": 0.0, |
| "step": 956500 |
| }, |
| { |
| "epoch": 1.5277131475121184, |
| "grad_norm": 0.0007153134210966527, |
| "learning_rate": 3.944851733037768e-06, |
| "loss": 0.0, |
| "step": 957000 |
| }, |
| { |
| "epoch": 1.528511325750108, |
| "grad_norm": 0.00027355499332770705, |
| "learning_rate": 3.93214428422482e-06, |
| "loss": 0.0, |
| "step": 957500 |
| }, |
| { |
| "epoch": 1.5293095039880975, |
| "grad_norm": 0.0003587014216464013, |
| "learning_rate": 3.919454247480034e-06, |
| "loss": 0.0, |
| "step": 958000 |
| }, |
| { |
| "epoch": 1.5301076822260873, |
| "grad_norm": 0.0002984220045618713, |
| "learning_rate": 3.906781642767514e-06, |
| "loss": 0.0, |
| "step": 958500 |
| }, |
| { |
| "epoch": 1.5309058604640768, |
| "grad_norm": 0.04637977480888367, |
| "learning_rate": 3.8941264900239396e-06, |
| "loss": 0.0, |
| "step": 959000 |
| }, |
| { |
| "epoch": 1.5317040387020664, |
| "grad_norm": 0.0003292471228633076, |
| "learning_rate": 3.881488809158518e-06, |
| "loss": 0.0, |
| "step": 959500 |
| }, |
| { |
| "epoch": 1.5325022169400562, |
| "grad_norm": 0.000271444208920002, |
| "learning_rate": 3.8688686200530035e-06, |
| "loss": 0.0, |
| "step": 960000 |
| }, |
| { |
| "epoch": 1.5333003951780455, |
| "grad_norm": 0.0002163940080208704, |
| "learning_rate": 3.856265942561596e-06, |
| "loss": 0.0, |
| "step": 960500 |
| }, |
| { |
| "epoch": 1.5340985734160353, |
| "grad_norm": 0.00026649428764358163, |
| "learning_rate": 3.843680796510972e-06, |
| "loss": 0.0, |
| "step": 961000 |
| }, |
| { |
| "epoch": 1.5348967516540248, |
| "grad_norm": 0.0002405370760243386, |
| "learning_rate": 3.831113201700205e-06, |
| "loss": 0.0, |
| "step": 961500 |
| }, |
| { |
| "epoch": 1.5356949298920144, |
| "grad_norm": 0.0004005729279015213, |
| "learning_rate": 3.818563177900777e-06, |
| "loss": 0.0, |
| "step": 962000 |
| }, |
| { |
| "epoch": 1.5364931081300042, |
| "grad_norm": 0.0006678230129182339, |
| "learning_rate": 3.8060307448565195e-06, |
| "loss": 0.0, |
| "step": 962500 |
| }, |
| { |
| "epoch": 1.5372912863679937, |
| "grad_norm": 0.0003191915457136929, |
| "learning_rate": 3.7935159222835787e-06, |
| "loss": 0.0, |
| "step": 963000 |
| }, |
| { |
| "epoch": 1.5380894646059833, |
| "grad_norm": 0.0002546892501413822, |
| "learning_rate": 3.781018729870423e-06, |
| "loss": 0.0, |
| "step": 963500 |
| }, |
| { |
| "epoch": 1.538887642843973, |
| "grad_norm": 0.000205778909730725, |
| "learning_rate": 3.7685391872777607e-06, |
| "loss": 0.0, |
| "step": 964000 |
| }, |
| { |
| "epoch": 1.5396858210819624, |
| "grad_norm": 0.00045933053479529917, |
| "learning_rate": 3.756077314138534e-06, |
| "loss": 0.0, |
| "step": 964500 |
| }, |
| { |
| "epoch": 1.5404839993199522, |
| "grad_norm": 0.0007948831771500409, |
| "learning_rate": 3.7436331300579004e-06, |
| "loss": 0.0, |
| "step": 965000 |
| }, |
| { |
| "epoch": 1.5412821775579417, |
| "grad_norm": 0.0002442169061396271, |
| "learning_rate": 3.731206654613181e-06, |
| "loss": 0.0, |
| "step": 965500 |
| }, |
| { |
| "epoch": 1.5420803557959313, |
| "grad_norm": 0.00023657285782974213, |
| "learning_rate": 3.718797907353844e-06, |
| "loss": 0.0, |
| "step": 966000 |
| }, |
| { |
| "epoch": 1.542878534033921, |
| "grad_norm": 0.0002562176960054785, |
| "learning_rate": 3.7064069078014532e-06, |
| "loss": 0.0, |
| "step": 966500 |
| }, |
| { |
| "epoch": 1.5436767122719106, |
| "grad_norm": 0.00023075290664564818, |
| "learning_rate": 3.694033675449667e-06, |
| "loss": 0.0, |
| "step": 967000 |
| }, |
| { |
| "epoch": 1.5444748905099002, |
| "grad_norm": 0.00022441007604356855, |
| "learning_rate": 3.6816782297641884e-06, |
| "loss": 0.0, |
| "step": 967500 |
| }, |
| { |
| "epoch": 1.54527306874789, |
| "grad_norm": 0.28657254576683044, |
| "learning_rate": 3.6693405901827277e-06, |
| "loss": 0.0, |
| "step": 968000 |
| }, |
| { |
| "epoch": 1.5460712469858793, |
| "grad_norm": 0.0003447630733717233, |
| "learning_rate": 3.657020776114994e-06, |
| "loss": 0.0, |
| "step": 968500 |
| }, |
| { |
| "epoch": 1.546869425223869, |
| "grad_norm": 0.00013505498645827174, |
| "learning_rate": 3.6447188069426514e-06, |
| "loss": 0.0, |
| "step": 969000 |
| }, |
| { |
| "epoch": 1.5476676034618586, |
| "grad_norm": 0.00015060935402289033, |
| "learning_rate": 3.6324347020192904e-06, |
| "loss": 0.0, |
| "step": 969500 |
| }, |
| { |
| "epoch": 1.5484657816998482, |
| "grad_norm": 0.00021117663709446788, |
| "learning_rate": 3.6201684806703894e-06, |
| "loss": 0.0, |
| "step": 970000 |
| }, |
| { |
| "epoch": 1.549263959937838, |
| "grad_norm": 0.00036845364957116544, |
| "learning_rate": 3.6079201621933017e-06, |
| "loss": 0.0, |
| "step": 970500 |
| }, |
| { |
| "epoch": 1.5500621381758275, |
| "grad_norm": 0.00026166459429077804, |
| "learning_rate": 3.5956897658572136e-06, |
| "loss": 0.0, |
| "step": 971000 |
| }, |
| { |
| "epoch": 1.550860316413817, |
| "grad_norm": 0.0002735615707933903, |
| "learning_rate": 3.583477310903109e-06, |
| "loss": 0.0, |
| "step": 971500 |
| }, |
| { |
| "epoch": 1.5516584946518068, |
| "grad_norm": 0.00018594361608847976, |
| "learning_rate": 3.5712828165437557e-06, |
| "loss": 0.0, |
| "step": 972000 |
| }, |
| { |
| "epoch": 1.5524566728897962, |
| "grad_norm": 0.00013815864804200828, |
| "learning_rate": 3.559106301963661e-06, |
| "loss": 0.0, |
| "step": 972500 |
| }, |
| { |
| "epoch": 1.553254851127786, |
| "grad_norm": 0.001760584069415927, |
| "learning_rate": 3.5469477863190504e-06, |
| "loss": 0.0, |
| "step": 973000 |
| }, |
| { |
| "epoch": 1.5540530293657757, |
| "grad_norm": 0.00026064462144859135, |
| "learning_rate": 3.534807288737824e-06, |
| "loss": 0.0, |
| "step": 973500 |
| }, |
| { |
| "epoch": 1.554851207603765, |
| "grad_norm": 0.0005155335529707372, |
| "learning_rate": 3.522684828319543e-06, |
| "loss": 0.0, |
| "step": 974000 |
| }, |
| { |
| "epoch": 1.5556493858417548, |
| "grad_norm": 0.00018664480012375861, |
| "learning_rate": 3.510580424135396e-06, |
| "loss": 0.0, |
| "step": 974500 |
| }, |
| { |
| "epoch": 1.5564475640797444, |
| "grad_norm": 0.002446634229272604, |
| "learning_rate": 3.498494095228151e-06, |
| "loss": 0.0, |
| "step": 975000 |
| }, |
| { |
| "epoch": 1.557245742317734, |
| "grad_norm": 0.00043144013034179807, |
| "learning_rate": 3.486425860612157e-06, |
| "loss": 0.0, |
| "step": 975500 |
| }, |
| { |
| "epoch": 1.5580439205557237, |
| "grad_norm": 0.00019767590856645256, |
| "learning_rate": 3.474375739273284e-06, |
| "loss": 0.0, |
| "step": 976000 |
| }, |
| { |
| "epoch": 1.5588420987937133, |
| "grad_norm": 0.00025959816412068903, |
| "learning_rate": 3.4623437501689182e-06, |
| "loss": 0.0, |
| "step": 976500 |
| }, |
| { |
| "epoch": 1.5596402770317028, |
| "grad_norm": 0.00023775137378834188, |
| "learning_rate": 3.4503299122279013e-06, |
| "loss": 0.0, |
| "step": 977000 |
| }, |
| { |
| "epoch": 1.5604384552696926, |
| "grad_norm": 0.0002516016538720578, |
| "learning_rate": 3.4383342443505385e-06, |
| "loss": 0.0, |
| "step": 977500 |
| }, |
| { |
| "epoch": 1.561236633507682, |
| "grad_norm": 0.0003499962331261486, |
| "learning_rate": 3.426356765408545e-06, |
| "loss": 0.0, |
| "step": 978000 |
| }, |
| { |
| "epoch": 1.5620348117456717, |
| "grad_norm": 0.00022499705664813519, |
| "learning_rate": 3.414397494245008e-06, |
| "loss": 0.0, |
| "step": 978500 |
| }, |
| { |
| "epoch": 1.5628329899836613, |
| "grad_norm": 0.0003553772403392941, |
| "learning_rate": 3.4024564496743843e-06, |
| "loss": 0.0, |
| "step": 979000 |
| }, |
| { |
| "epoch": 1.5636311682216508, |
| "grad_norm": 0.00025216786889359355, |
| "learning_rate": 3.3905336504824537e-06, |
| "loss": 0.0, |
| "step": 979500 |
| }, |
| { |
| "epoch": 1.5644293464596406, |
| "grad_norm": 1341.220458984375, |
| "learning_rate": 3.3786291154262935e-06, |
| "loss": 0.0, |
| "step": 980000 |
| }, |
| { |
| "epoch": 1.5652275246976302, |
| "grad_norm": 0.003939106594771147, |
| "learning_rate": 3.3667428632342373e-06, |
| "loss": 0.0, |
| "step": 980500 |
| }, |
| { |
| "epoch": 1.5660257029356197, |
| "grad_norm": 0.00019107607658952475, |
| "learning_rate": 3.354874912605866e-06, |
| "loss": 0.0, |
| "step": 981000 |
| }, |
| { |
| "epoch": 1.5668238811736095, |
| "grad_norm": 0.0002252194390166551, |
| "learning_rate": 3.343025282211972e-06, |
| "loss": 0.0, |
| "step": 981500 |
| }, |
| { |
| "epoch": 1.5676220594115988, |
| "grad_norm": 0.00031818528077565134, |
| "learning_rate": 3.3311939906945094e-06, |
| "loss": 0.0, |
| "step": 982000 |
| }, |
| { |
| "epoch": 1.5684202376495886, |
| "grad_norm": 0.0002334948512725532, |
| "learning_rate": 3.319381056666595e-06, |
| "loss": 0.0, |
| "step": 982500 |
| }, |
| { |
| "epoch": 1.5692184158875782, |
| "grad_norm": 0.003088391851633787, |
| "learning_rate": 3.307586498712468e-06, |
| "loss": 0.0, |
| "step": 983000 |
| }, |
| { |
| "epoch": 1.5700165941255677, |
| "grad_norm": 0.00014293832646217197, |
| "learning_rate": 3.2958103353874445e-06, |
| "loss": 0.0, |
| "step": 983500 |
| }, |
| { |
| "epoch": 1.5708147723635575, |
| "grad_norm": 0.0005570229259319603, |
| "learning_rate": 3.2840525852179165e-06, |
| "loss": 0.0, |
| "step": 984000 |
| }, |
| { |
| "epoch": 1.571612950601547, |
| "grad_norm": 0.00026708198129199445, |
| "learning_rate": 3.272313266701291e-06, |
| "loss": 0.0, |
| "step": 984500 |
| }, |
| { |
| "epoch": 1.5724111288395366, |
| "grad_norm": 0.0002196293353335932, |
| "learning_rate": 3.260592398306002e-06, |
| "loss": 0.0, |
| "step": 985000 |
| }, |
| { |
| "epoch": 1.5732093070775264, |
| "grad_norm": 0.04503238573670387, |
| "learning_rate": 3.2488899984714326e-06, |
| "loss": 0.0, |
| "step": 985500 |
| }, |
| { |
| "epoch": 1.5740074853155157, |
| "grad_norm": 0.00016815183334983885, |
| "learning_rate": 3.2372060856079287e-06, |
| "loss": 0.0, |
| "step": 986000 |
| }, |
| { |
| "epoch": 1.5748056635535055, |
| "grad_norm": 0.00018650360289029777, |
| "learning_rate": 3.2255406780967488e-06, |
| "loss": 0.0, |
| "step": 986500 |
| }, |
| { |
| "epoch": 1.575603841791495, |
| "grad_norm": 0.00018975707644131035, |
| "learning_rate": 3.213893794290029e-06, |
| "loss": 0.0, |
| "step": 987000 |
| }, |
| { |
| "epoch": 1.5764020200294846, |
| "grad_norm": 0.00021380094403866678, |
| "learning_rate": 3.2022654525107764e-06, |
| "loss": 0.0, |
| "step": 987500 |
| }, |
| { |
| "epoch": 1.5772001982674744, |
| "grad_norm": 0.0003533356648404151, |
| "learning_rate": 3.1906556710528117e-06, |
| "loss": 0.0, |
| "step": 988000 |
| }, |
| { |
| "epoch": 1.577998376505464, |
| "grad_norm": 0.0001912272855406627, |
| "learning_rate": 3.179064468180782e-06, |
| "loss": 0.0, |
| "step": 988500 |
| }, |
| { |
| "epoch": 1.5787965547434535, |
| "grad_norm": 0.00029489348526112735, |
| "learning_rate": 3.1674918621300764e-06, |
| "loss": 0.0, |
| "step": 989000 |
| }, |
| { |
| "epoch": 1.5795947329814433, |
| "grad_norm": 0.00020246152416802943, |
| "learning_rate": 3.1559378711068502e-06, |
| "loss": 0.0, |
| "step": 989500 |
| }, |
| { |
| "epoch": 1.5803929112194328, |
| "grad_norm": 0.0003821216232609004, |
| "learning_rate": 3.1444025132879654e-06, |
| "loss": 0.0, |
| "step": 990000 |
| }, |
| { |
| "epoch": 1.5811910894574224, |
| "grad_norm": 0.00018945671035908163, |
| "learning_rate": 3.132885806820962e-06, |
| "loss": 0.0, |
| "step": 990500 |
| }, |
| { |
| "epoch": 1.5819892676954121, |
| "grad_norm": 0.00016204801795538515, |
| "learning_rate": 3.1213877698240532e-06, |
| "loss": 0.0, |
| "step": 991000 |
| }, |
| { |
| "epoch": 1.5827874459334015, |
| "grad_norm": 0.000292215176159516, |
| "learning_rate": 3.1099084203860616e-06, |
| "loss": 0.0, |
| "step": 991500 |
| }, |
| { |
| "epoch": 1.5835856241713913, |
| "grad_norm": 0.0001693951344350353, |
| "learning_rate": 3.098447776566436e-06, |
| "loss": 0.0, |
| "step": 992000 |
| }, |
| { |
| "epoch": 1.5843838024093808, |
| "grad_norm": 0.00010817296424647793, |
| "learning_rate": 3.0870058563951768e-06, |
| "loss": 0.0, |
| "step": 992500 |
| }, |
| { |
| "epoch": 1.5851819806473704, |
| "grad_norm": 0.00021646858658641577, |
| "learning_rate": 3.0755826778728306e-06, |
| "loss": 0.0, |
| "step": 993000 |
| }, |
| { |
| "epoch": 1.5859801588853601, |
| "grad_norm": 0.0001887698017526418, |
| "learning_rate": 3.0641782589704655e-06, |
| "loss": 0.0, |
| "step": 993500 |
| }, |
| { |
| "epoch": 1.5867783371233497, |
| "grad_norm": 0.00021660560742020607, |
| "learning_rate": 3.052792617629634e-06, |
| "loss": 0.0, |
| "step": 994000 |
| }, |
| { |
| "epoch": 1.5875765153613393, |
| "grad_norm": 9.980924369301647e-05, |
| "learning_rate": 3.041425771762355e-06, |
| "loss": 0.0, |
| "step": 994500 |
| }, |
| { |
| "epoch": 1.588374693599329, |
| "grad_norm": 0.00025837685097940266, |
| "learning_rate": 3.0300777392510557e-06, |
| "loss": 0.0, |
| "step": 995000 |
| }, |
| { |
| "epoch": 1.5891728718373184, |
| "grad_norm": 0.00017442693933844566, |
| "learning_rate": 3.018748537948599e-06, |
| "loss": 0.0, |
| "step": 995500 |
| }, |
| { |
| "epoch": 1.5899710500753081, |
| "grad_norm": 0.00025473537971265614, |
| "learning_rate": 3.0074381856781974e-06, |
| "loss": 0.0, |
| "step": 996000 |
| }, |
| { |
| "epoch": 1.5907692283132977, |
| "grad_norm": 0.00018088742217514664, |
| "learning_rate": 2.9961467002334126e-06, |
| "loss": 0.0, |
| "step": 996500 |
| }, |
| { |
| "epoch": 1.5915674065512873, |
| "grad_norm": 0.00037461461033672094, |
| "learning_rate": 2.9848740993781313e-06, |
| "loss": 0.0, |
| "step": 997000 |
| }, |
| { |
| "epoch": 1.592365584789277, |
| "grad_norm": 0.002994926879182458, |
| "learning_rate": 2.9736204008465333e-06, |
| "loss": 0.0, |
| "step": 997500 |
| }, |
| { |
| "epoch": 1.5931637630272666, |
| "grad_norm": 0.00024818425299599767, |
| "learning_rate": 2.962385622343058e-06, |
| "loss": 0.0, |
| "step": 998000 |
| }, |
| { |
| "epoch": 1.5939619412652561, |
| "grad_norm": 0.0005606827326118946, |
| "learning_rate": 2.9511697815423698e-06, |
| "loss": 0.0, |
| "step": 998500 |
| }, |
| { |
| "epoch": 1.594760119503246, |
| "grad_norm": 0.0002748910628724843, |
| "learning_rate": 2.9399728960893537e-06, |
| "loss": 0.0, |
| "step": 999000 |
| }, |
| { |
| "epoch": 1.5955582977412353, |
| "grad_norm": 0.00022624792472925037, |
| "learning_rate": 2.928794983599071e-06, |
| "loss": 0.0, |
| "step": 999500 |
| }, |
| { |
| "epoch": 1.596356475979225, |
| "grad_norm": 0.0002598523278720677, |
| "learning_rate": 2.9176360616567267e-06, |
| "loss": 0.0, |
| "step": 1000000 |
| }, |
| { |
| "epoch": 1.5971546542172146, |
| "grad_norm": 0.0001525416737422347, |
| "learning_rate": 2.9064961478176584e-06, |
| "loss": 0.0, |
| "step": 1000500 |
| }, |
| { |
| "epoch": 1.5979528324552041, |
| "grad_norm": 0.00017040724924299866, |
| "learning_rate": 2.8953752596072976e-06, |
| "loss": 0.0, |
| "step": 1001000 |
| }, |
| { |
| "epoch": 1.598751010693194, |
| "grad_norm": 0.00020649759972002357, |
| "learning_rate": 2.884273414521146e-06, |
| "loss": 0.0, |
| "step": 1001500 |
| }, |
| { |
| "epoch": 1.5995491889311835, |
| "grad_norm": 0.000152139225974679, |
| "learning_rate": 2.8731906300247376e-06, |
| "loss": 0.0, |
| "step": 1002000 |
| }, |
| { |
| "epoch": 1.600347367169173, |
| "grad_norm": 0.00011175717372680083, |
| "learning_rate": 2.86212692355363e-06, |
| "loss": 0.0, |
| "step": 1002500 |
| }, |
| { |
| "epoch": 1.6011455454071628, |
| "grad_norm": 0.00022403241018764675, |
| "learning_rate": 2.851082312513368e-06, |
| "loss": 0.0, |
| "step": 1003000 |
| }, |
| { |
| "epoch": 1.6019437236451521, |
| "grad_norm": 0.0021336127538233995, |
| "learning_rate": 2.840056814279443e-06, |
| "loss": 0.0, |
| "step": 1003500 |
| }, |
| { |
| "epoch": 1.602741901883142, |
| "grad_norm": 0.00018766756693366915, |
| "learning_rate": 2.829050446197291e-06, |
| "loss": 0.0, |
| "step": 1004000 |
| }, |
| { |
| "epoch": 1.6035400801211317, |
| "grad_norm": 0.00015176778833847493, |
| "learning_rate": 2.818063225582246e-06, |
| "loss": 0.0, |
| "step": 1004500 |
| }, |
| { |
| "epoch": 1.604338258359121, |
| "grad_norm": 0.00027044734451919794, |
| "learning_rate": 2.8070951697195222e-06, |
| "loss": 0.0, |
| "step": 1005000 |
| }, |
| { |
| "epoch": 1.6051364365971108, |
| "grad_norm": 0.0003155279264319688, |
| "learning_rate": 2.7961462958641766e-06, |
| "loss": 0.0, |
| "step": 1005500 |
| }, |
| { |
| "epoch": 1.6059346148351004, |
| "grad_norm": 0.0001946605771081522, |
| "learning_rate": 2.785216621241098e-06, |
| "loss": 0.0, |
| "step": 1006000 |
| }, |
| { |
| "epoch": 1.60673279307309, |
| "grad_norm": 0.00022992221056483686, |
| "learning_rate": 2.774306163044969e-06, |
| "loss": 0.0, |
| "step": 1006500 |
| }, |
| { |
| "epoch": 1.6075309713110797, |
| "grad_norm": 0.000249813572736457, |
| "learning_rate": 2.7634149384402296e-06, |
| "loss": 0.0, |
| "step": 1007000 |
| }, |
| { |
| "epoch": 1.6083291495490692, |
| "grad_norm": 0.000225092371692881, |
| "learning_rate": 2.752542964561077e-06, |
| "loss": 0.0, |
| "step": 1007500 |
| }, |
| { |
| "epoch": 1.6091273277870588, |
| "grad_norm": 0.0002945462183561176, |
| "learning_rate": 2.7416902585114135e-06, |
| "loss": 0.0, |
| "step": 1008000 |
| }, |
| { |
| "epoch": 1.6099255060250486, |
| "grad_norm": 0.0001682073052506894, |
| "learning_rate": 2.7308568373648357e-06, |
| "loss": 0.0, |
| "step": 1008500 |
| }, |
| { |
| "epoch": 1.610723684263038, |
| "grad_norm": 0.22438447177410126, |
| "learning_rate": 2.7200427181645895e-06, |
| "loss": 0.0, |
| "step": 1009000 |
| }, |
| { |
| "epoch": 1.6115218625010277, |
| "grad_norm": 0.0002828448486980051, |
| "learning_rate": 2.7092479179235652e-06, |
| "loss": 0.0, |
| "step": 1009500 |
| }, |
| { |
| "epoch": 1.6123200407390172, |
| "grad_norm": 0.0001349742669845, |
| "learning_rate": 2.6984724536242637e-06, |
| "loss": 0.0, |
| "step": 1010000 |
| }, |
| { |
| "epoch": 1.6131182189770068, |
| "grad_norm": 0.0002548525226302445, |
| "learning_rate": 2.6877163422187483e-06, |
| "loss": 0.0, |
| "step": 1010500 |
| }, |
| { |
| "epoch": 1.6139163972149966, |
| "grad_norm": 0.00014282946358434856, |
| "learning_rate": 2.6769796006286544e-06, |
| "loss": 0.0, |
| "step": 1011000 |
| }, |
| { |
| "epoch": 1.6147145754529861, |
| "grad_norm": 0.00016806498751975596, |
| "learning_rate": 2.6662622457451408e-06, |
| "loss": 0.0, |
| "step": 1011500 |
| }, |
| { |
| "epoch": 1.6155127536909757, |
| "grad_norm": 0.00019143502868246287, |
| "learning_rate": 2.6555642944288565e-06, |
| "loss": 0.0, |
| "step": 1012000 |
| }, |
| { |
| "epoch": 1.6163109319289655, |
| "grad_norm": 0.00015487658674828708, |
| "learning_rate": 2.644885763509936e-06, |
| "loss": 0.0, |
| "step": 1012500 |
| }, |
| { |
| "epoch": 1.6171091101669548, |
| "grad_norm": 0.008766920305788517, |
| "learning_rate": 2.6342266697879573e-06, |
| "loss": 0.0, |
| "step": 1013000 |
| }, |
| { |
| "epoch": 1.6179072884049446, |
| "grad_norm": 0.00031943133217282593, |
| "learning_rate": 2.6235870300319237e-06, |
| "loss": 0.0, |
| "step": 1013500 |
| }, |
| { |
| "epoch": 1.6187054666429341, |
| "grad_norm": 0.00034817136474885046, |
| "learning_rate": 2.612966860980222e-06, |
| "loss": 0.0, |
| "step": 1014000 |
| }, |
| { |
| "epoch": 1.6195036448809237, |
| "grad_norm": 0.00019778979185502976, |
| "learning_rate": 2.6023661793406196e-06, |
| "loss": 0.0, |
| "step": 1014500 |
| }, |
| { |
| "epoch": 1.6203018231189135, |
| "grad_norm": 0.00024411575577687472, |
| "learning_rate": 2.5917850017902225e-06, |
| "loss": 0.0, |
| "step": 1015000 |
| }, |
| { |
| "epoch": 1.621100001356903, |
| "grad_norm": 0.00018817426462192088, |
| "learning_rate": 2.5812233449754465e-06, |
| "loss": 0.0, |
| "step": 1015500 |
| }, |
| { |
| "epoch": 1.6218981795948926, |
| "grad_norm": 0.00015945191262289882, |
| "learning_rate": 2.570681225512007e-06, |
| "loss": 0.0, |
| "step": 1016000 |
| }, |
| { |
| "epoch": 1.6226963578328824, |
| "grad_norm": 0.00012362716370262206, |
| "learning_rate": 2.5601586599848746e-06, |
| "loss": 0.0, |
| "step": 1016500 |
| }, |
| { |
| "epoch": 1.6234945360708717, |
| "grad_norm": 0.00016100883658509701, |
| "learning_rate": 2.5496556649482687e-06, |
| "loss": 0.0, |
| "step": 1017000 |
| }, |
| { |
| "epoch": 1.6242927143088615, |
| "grad_norm": 0.0001855713635450229, |
| "learning_rate": 2.539172256925602e-06, |
| "loss": 0.0, |
| "step": 1017500 |
| }, |
| { |
| "epoch": 1.625090892546851, |
| "grad_norm": 0.00019156381313223392, |
| "learning_rate": 2.52870845240949e-06, |
| "loss": 0.0, |
| "step": 1018000 |
| }, |
| { |
| "epoch": 1.6258890707848406, |
| "grad_norm": 0.00016442120249848813, |
| "learning_rate": 2.518264267861703e-06, |
| "loss": 0.0, |
| "step": 1018500 |
| }, |
| { |
| "epoch": 1.6266872490228304, |
| "grad_norm": 0.00023895353660918772, |
| "learning_rate": 2.507839719713134e-06, |
| "loss": 0.0, |
| "step": 1019000 |
| }, |
| { |
| "epoch": 1.62748542726082, |
| "grad_norm": 0.00017404610116500407, |
| "learning_rate": 2.497434824363805e-06, |
| "loss": 0.0, |
| "step": 1019500 |
| }, |
| { |
| "epoch": 1.6282836054988095, |
| "grad_norm": 0.0001925264805322513, |
| "learning_rate": 2.4870495981827933e-06, |
| "loss": 0.0, |
| "step": 1020000 |
| }, |
| { |
| "epoch": 1.6290817837367992, |
| "grad_norm": 0.00015991131658665836, |
| "learning_rate": 2.4766840575082617e-06, |
| "loss": 0.0, |
| "step": 1020500 |
| }, |
| { |
| "epoch": 1.6298799619747888, |
| "grad_norm": 0.00015943833568599075, |
| "learning_rate": 2.466338218647384e-06, |
| "loss": 0.0, |
| "step": 1021000 |
| }, |
| { |
| "epoch": 1.6306781402127783, |
| "grad_norm": 0.00015321993851102889, |
| "learning_rate": 2.4560120978763335e-06, |
| "loss": 0.0, |
| "step": 1021500 |
| }, |
| { |
| "epoch": 1.6314763184507681, |
| "grad_norm": 0.00015020738646853715, |
| "learning_rate": 2.4457057114402892e-06, |
| "loss": 0.0, |
| "step": 1022000 |
| }, |
| { |
| "epoch": 1.6322744966887575, |
| "grad_norm": 0.00021532770188059658, |
| "learning_rate": 2.435419075553358e-06, |
| "loss": 0.0, |
| "step": 1022500 |
| }, |
| { |
| "epoch": 1.6330726749267472, |
| "grad_norm": 0.00013064758968539536, |
| "learning_rate": 2.4251522063985893e-06, |
| "loss": 0.0, |
| "step": 1023000 |
| }, |
| { |
| "epoch": 1.6338708531647368, |
| "grad_norm": 0.00013219025277066976, |
| "learning_rate": 2.4149051201279213e-06, |
| "loss": 0.0, |
| "step": 1023500 |
| }, |
| { |
| "epoch": 1.6346690314027263, |
| "grad_norm": 0.00017550366465002298, |
| "learning_rate": 2.4046778328621945e-06, |
| "loss": 0.0, |
| "step": 1024000 |
| }, |
| { |
| "epoch": 1.6354672096407161, |
| "grad_norm": 0.00023099327518139035, |
| "learning_rate": 2.3944703606910757e-06, |
| "loss": 0.0, |
| "step": 1024500 |
| }, |
| { |
| "epoch": 1.6362653878787057, |
| "grad_norm": 0.00028397998539730906, |
| "learning_rate": 2.3842827196730633e-06, |
| "loss": 0.0, |
| "step": 1025000 |
| }, |
| { |
| "epoch": 1.6370635661166952, |
| "grad_norm": 0.00014657012070529163, |
| "learning_rate": 2.3741149258354766e-06, |
| "loss": 0.0, |
| "step": 1025500 |
| }, |
| { |
| "epoch": 1.637861744354685, |
| "grad_norm": 0.00023401924408972263, |
| "learning_rate": 2.363966995174387e-06, |
| "loss": 0.0, |
| "step": 1026000 |
| }, |
| { |
| "epoch": 1.6386599225926743, |
| "grad_norm": 0.00034546665847301483, |
| "learning_rate": 2.353838943654632e-06, |
| "loss": 0.0, |
| "step": 1026500 |
| }, |
| { |
| "epoch": 1.6394581008306641, |
| "grad_norm": 0.00020099164976272732, |
| "learning_rate": 2.3437307872097597e-06, |
| "loss": 0.0, |
| "step": 1027000 |
| }, |
| { |
| "epoch": 1.6402562790686537, |
| "grad_norm": 0.0002026653237408027, |
| "learning_rate": 2.333642541742044e-06, |
| "loss": 0.0, |
| "step": 1027500 |
| }, |
| { |
| "epoch": 1.6410544573066432, |
| "grad_norm": 0.00018300658848602325, |
| "learning_rate": 2.323574223122414e-06, |
| "loss": 0.0, |
| "step": 1028000 |
| }, |
| { |
| "epoch": 1.641852635544633, |
| "grad_norm": 0.00023787171812728047, |
| "learning_rate": 2.313525847190448e-06, |
| "loss": 0.0, |
| "step": 1028500 |
| }, |
| { |
| "epoch": 1.6426508137826226, |
| "grad_norm": 0.00020482360559981316, |
| "learning_rate": 2.303497429754365e-06, |
| "loss": 0.0, |
| "step": 1029000 |
| }, |
| { |
| "epoch": 1.6434489920206121, |
| "grad_norm": 0.0002030259493039921, |
| "learning_rate": 2.293488986590976e-06, |
| "loss": 0.0, |
| "step": 1029500 |
| }, |
| { |
| "epoch": 1.644247170258602, |
| "grad_norm": 0.000236693857004866, |
| "learning_rate": 2.2835005334456744e-06, |
| "loss": 0.0, |
| "step": 1030000 |
| }, |
| { |
| "epoch": 1.6450453484965912, |
| "grad_norm": 0.00017397591727785766, |
| "learning_rate": 2.273532086032394e-06, |
| "loss": 0.0, |
| "step": 1030500 |
| }, |
| { |
| "epoch": 1.645843526734581, |
| "grad_norm": 0.00017545593436807394, |
| "learning_rate": 2.2635836600336046e-06, |
| "loss": 0.0, |
| "step": 1031000 |
| }, |
| { |
| "epoch": 1.6466417049725706, |
| "grad_norm": 0.000387836538720876, |
| "learning_rate": 2.2536552711002804e-06, |
| "loss": 0.0, |
| "step": 1031500 |
| }, |
| { |
| "epoch": 1.6474398832105601, |
| "grad_norm": 3.9118099212646484, |
| "learning_rate": 2.243746934851859e-06, |
| "loss": 0.0, |
| "step": 1032000 |
| }, |
| { |
| "epoch": 1.64823806144855, |
| "grad_norm": 1.9383196830749512, |
| "learning_rate": 2.2338586668762464e-06, |
| "loss": 0.0, |
| "step": 1032500 |
| }, |
| { |
| "epoch": 1.6490362396865395, |
| "grad_norm": 0.000305307621601969, |
| "learning_rate": 2.2239904827297695e-06, |
| "loss": 0.0, |
| "step": 1033000 |
| }, |
| { |
| "epoch": 1.649834417924529, |
| "grad_norm": 0.00021406357700470835, |
| "learning_rate": 2.2141423979371645e-06, |
| "loss": 0.0, |
| "step": 1033500 |
| }, |
| { |
| "epoch": 1.6506325961625188, |
| "grad_norm": 0.0006000241846777499, |
| "learning_rate": 2.2043144279915356e-06, |
| "loss": 0.0, |
| "step": 1034000 |
| }, |
| { |
| "epoch": 1.6514307744005081, |
| "grad_norm": 0.00020043583936057985, |
| "learning_rate": 2.194506588354352e-06, |
| "loss": 0.0, |
| "step": 1034500 |
| }, |
| { |
| "epoch": 1.652228952638498, |
| "grad_norm": 0.01158731710165739, |
| "learning_rate": 2.1847188944554176e-06, |
| "loss": 0.0, |
| "step": 1035000 |
| }, |
| { |
| "epoch": 1.6530271308764877, |
| "grad_norm": 0.00017917039804160595, |
| "learning_rate": 2.174951361692825e-06, |
| "loss": 0.0, |
| "step": 1035500 |
| }, |
| { |
| "epoch": 1.653825309114477, |
| "grad_norm": 0.00020505704742390662, |
| "learning_rate": 2.165204005432968e-06, |
| "loss": 0.0, |
| "step": 1036000 |
| }, |
| { |
| "epoch": 1.6546234873524668, |
| "grad_norm": 0.00024057974223978817, |
| "learning_rate": 2.1554768410104898e-06, |
| "loss": 0.0, |
| "step": 1036500 |
| }, |
| { |
| "epoch": 1.6554216655904563, |
| "grad_norm": 0.0001608024467714131, |
| "learning_rate": 2.1457698837282726e-06, |
| "loss": 0.0, |
| "step": 1037000 |
| }, |
| { |
| "epoch": 1.656219843828446, |
| "grad_norm": 0.00022758333943784237, |
| "learning_rate": 2.1360831488573956e-06, |
| "loss": 0.0, |
| "step": 1037500 |
| }, |
| { |
| "epoch": 1.6570180220664357, |
| "grad_norm": 0.002283047651872039, |
| "learning_rate": 2.1264166516371374e-06, |
| "loss": 0.0, |
| "step": 1038000 |
| }, |
| { |
| "epoch": 1.6578162003044252, |
| "grad_norm": 0.00017407875566277653, |
| "learning_rate": 2.11677040727494e-06, |
| "loss": 0.0, |
| "step": 1038500 |
| }, |
| { |
| "epoch": 1.6586143785424148, |
| "grad_norm": 0.00016274578229058534, |
| "learning_rate": 2.107144430946367e-06, |
| "loss": 0.0, |
| "step": 1039000 |
| }, |
| { |
| "epoch": 1.6594125567804046, |
| "grad_norm": 0.00018639072368387133, |
| "learning_rate": 2.097538737795112e-06, |
| "loss": 0.0, |
| "step": 1039500 |
| }, |
| { |
| "epoch": 1.660210735018394, |
| "grad_norm": 0.0005455246428027749, |
| "learning_rate": 2.087953342932958e-06, |
| "loss": 0.0, |
| "step": 1040000 |
| }, |
| { |
| "epoch": 1.6610089132563837, |
| "grad_norm": 0.00024452278739772737, |
| "learning_rate": 2.0783882614397413e-06, |
| "loss": 0.0, |
| "step": 1040500 |
| }, |
| { |
| "epoch": 1.6618070914943732, |
| "grad_norm": 0.0030349683947861195, |
| "learning_rate": 2.068843508363353e-06, |
| "loss": 0.0, |
| "step": 1041000 |
| }, |
| { |
| "epoch": 1.6626052697323628, |
| "grad_norm": 0.00021702187950722873, |
| "learning_rate": 2.059319098719701e-06, |
| "loss": 0.0, |
| "step": 1041500 |
| }, |
| { |
| "epoch": 1.6634034479703526, |
| "grad_norm": 0.00023376916942652315, |
| "learning_rate": 2.0498150474926897e-06, |
| "loss": 0.0, |
| "step": 1042000 |
| }, |
| { |
| "epoch": 1.664201626208342, |
| "grad_norm": 0.00025968361296691, |
| "learning_rate": 2.040331369634189e-06, |
| "loss": 0.0, |
| "step": 1042500 |
| }, |
| { |
| "epoch": 1.6649998044463317, |
| "grad_norm": 0.00019714338122867048, |
| "learning_rate": 2.0308680800640227e-06, |
| "loss": 0.0, |
| "step": 1043000 |
| }, |
| { |
| "epoch": 1.6657979826843214, |
| "grad_norm": 0.00014786337851546705, |
| "learning_rate": 2.021425193669945e-06, |
| "loss": 0.0, |
| "step": 1043500 |
| }, |
| { |
| "epoch": 1.6665961609223108, |
| "grad_norm": 0.00020566130115184933, |
| "learning_rate": 2.0120027253075945e-06, |
| "loss": 0.0, |
| "step": 1044000 |
| }, |
| { |
| "epoch": 1.6673943391603006, |
| "grad_norm": 0.00031280418625101447, |
| "learning_rate": 2.0026006898005033e-06, |
| "loss": 0.0, |
| "step": 1044500 |
| }, |
| { |
| "epoch": 1.66819251739829, |
| "grad_norm": 0.0002165736659662798, |
| "learning_rate": 1.993219101940055e-06, |
| "loss": 0.0, |
| "step": 1045000 |
| }, |
| { |
| "epoch": 1.6689906956362797, |
| "grad_norm": 0.00018687658302951604, |
| "learning_rate": 1.983857976485464e-06, |
| "loss": 0.0, |
| "step": 1045500 |
| }, |
| { |
| "epoch": 1.6697888738742694, |
| "grad_norm": 0.0002107059262925759, |
| "learning_rate": 1.974517328163748e-06, |
| "loss": 0.0, |
| "step": 1046000 |
| }, |
| { |
| "epoch": 1.670587052112259, |
| "grad_norm": 0.00026908345171250403, |
| "learning_rate": 1.965197171669715e-06, |
| "loss": 0.0, |
| "step": 1046500 |
| }, |
| { |
| "epoch": 1.6713852303502486, |
| "grad_norm": 0.00020733063865918666, |
| "learning_rate": 1.9558975216659407e-06, |
| "loss": 0.0, |
| "step": 1047000 |
| }, |
| { |
| "epoch": 1.6721834085882383, |
| "grad_norm": 0.0003536621225066483, |
| "learning_rate": 1.946618392782725e-06, |
| "loss": 0.0, |
| "step": 1047500 |
| }, |
| { |
| "epoch": 1.6729815868262277, |
| "grad_norm": 0.0002418523363303393, |
| "learning_rate": 1.937359799618094e-06, |
| "loss": 0.0, |
| "step": 1048000 |
| }, |
| { |
| "epoch": 1.6737797650642174, |
| "grad_norm": 0.0007535194745287299, |
| "learning_rate": 1.928121756737766e-06, |
| "loss": 0.0, |
| "step": 1048500 |
| }, |
| { |
| "epoch": 1.674577943302207, |
| "grad_norm": 0.00025971242575906217, |
| "learning_rate": 1.918904278675132e-06, |
| "loss": 0.0, |
| "step": 1049000 |
| }, |
| { |
| "epoch": 1.6753761215401966, |
| "grad_norm": 0.0002389907167525962, |
| "learning_rate": 1.9097073799312237e-06, |
| "loss": 0.0, |
| "step": 1049500 |
| }, |
| { |
| "epoch": 1.6761742997781863, |
| "grad_norm": 0.00017279484018217772, |
| "learning_rate": 1.9005310749746907e-06, |
| "loss": 0.0, |
| "step": 1050000 |
| }, |
| { |
| "epoch": 1.6761742997781863, |
| "eval_loss": 1.5775514839333482e-05, |
| "eval_runtime": 22138.2637, |
| "eval_samples_per_second": 100.608, |
| "eval_steps_per_second": 3.144, |
| "step": 1050000 |
| }, |
| { |
| "epoch": 1.6769724780161759, |
| "grad_norm": 0.0001552966859890148, |
| "learning_rate": 1.8913753782418087e-06, |
| "loss": 0.0, |
| "step": 1050500 |
| }, |
| { |
| "epoch": 1.6777706562541654, |
| "grad_norm": 0.00013622870028484613, |
| "learning_rate": 1.8822403041364056e-06, |
| "loss": 0.0, |
| "step": 1051000 |
| }, |
| { |
| "epoch": 1.6785688344921552, |
| "grad_norm": 0.002185018267482519, |
| "learning_rate": 1.8731258670298823e-06, |
| "loss": 0.0, |
| "step": 1051500 |
| }, |
| { |
| "epoch": 1.6793670127301448, |
| "grad_norm": 0.0002003060217248276, |
| "learning_rate": 1.8640320812611672e-06, |
| "loss": 0.0, |
| "step": 1052000 |
| }, |
| { |
| "epoch": 1.6801651909681343, |
| "grad_norm": 0.00023303533089347184, |
| "learning_rate": 1.854958961136703e-06, |
| "loss": 0.0, |
| "step": 1052500 |
| }, |
| { |
| "epoch": 1.680963369206124, |
| "grad_norm": 0.00019897008314728737, |
| "learning_rate": 1.8459065209304165e-06, |
| "loss": 0.0, |
| "step": 1053000 |
| }, |
| { |
| "epoch": 1.6817615474441134, |
| "grad_norm": 0.0003193389857187867, |
| "learning_rate": 1.8368747748836963e-06, |
| "loss": 0.0, |
| "step": 1053500 |
| }, |
| { |
| "epoch": 1.6825597256821032, |
| "grad_norm": 0.00824870727956295, |
| "learning_rate": 1.8278637372053925e-06, |
| "loss": 0.0, |
| "step": 1054000 |
| }, |
| { |
| "epoch": 1.6833579039200928, |
| "grad_norm": 0.0001839359028963372, |
| "learning_rate": 1.818873422071759e-06, |
| "loss": 0.0, |
| "step": 1054500 |
| }, |
| { |
| "epoch": 1.6841560821580823, |
| "grad_norm": 0.0003090524405706674, |
| "learning_rate": 1.809903843626457e-06, |
| "loss": 0.0, |
| "step": 1055000 |
| }, |
| { |
| "epoch": 1.684954260396072, |
| "grad_norm": 0.0002031936019193381, |
| "learning_rate": 1.800955015980517e-06, |
| "loss": 0.0, |
| "step": 1055500 |
| }, |
| { |
| "epoch": 1.6857524386340617, |
| "grad_norm": 0.00020773948926944286, |
| "learning_rate": 1.7920269532123395e-06, |
| "loss": 0.0, |
| "step": 1056000 |
| }, |
| { |
| "epoch": 1.6865506168720512, |
| "grad_norm": 0.0002412260655546561, |
| "learning_rate": 1.7831196693676439e-06, |
| "loss": 0.0, |
| "step": 1056500 |
| }, |
| { |
| "epoch": 1.687348795110041, |
| "grad_norm": 0.00022422504844143987, |
| "learning_rate": 1.7742331784594556e-06, |
| "loss": 0.0, |
| "step": 1057000 |
| }, |
| { |
| "epoch": 1.6881469733480303, |
| "grad_norm": 0.00018461488070897758, |
| "learning_rate": 1.7653674944681103e-06, |
| "loss": 0.0, |
| "step": 1057500 |
| }, |
| { |
| "epoch": 1.68894515158602, |
| "grad_norm": 0.0002039273822447285, |
| "learning_rate": 1.756522631341184e-06, |
| "loss": 0.0, |
| "step": 1058000 |
| }, |
| { |
| "epoch": 1.6897433298240097, |
| "grad_norm": 0.0002371125592617318, |
| "learning_rate": 1.74769860299352e-06, |
| "loss": 0.0, |
| "step": 1058500 |
| }, |
| { |
| "epoch": 1.6905415080619992, |
| "grad_norm": 0.00026676972629502416, |
| "learning_rate": 1.7388954233071646e-06, |
| "loss": 0.0, |
| "step": 1059000 |
| }, |
| { |
| "epoch": 1.691339686299989, |
| "grad_norm": 0.00016791919188108295, |
| "learning_rate": 1.730113106131375e-06, |
| "loss": 0.0, |
| "step": 1059500 |
| }, |
| { |
| "epoch": 1.6921378645379785, |
| "grad_norm": 0.00033503255690447986, |
| "learning_rate": 1.721351665282593e-06, |
| "loss": 0.0, |
| "step": 1060000 |
| }, |
| { |
| "epoch": 1.692936042775968, |
| "grad_norm": 0.00023998318647500128, |
| "learning_rate": 1.7126111145444018e-06, |
| "loss": 0.0, |
| "step": 1060500 |
| }, |
| { |
| "epoch": 1.6937342210139579, |
| "grad_norm": 0.00032387388637289405, |
| "learning_rate": 1.703891467667531e-06, |
| "loss": 0.0, |
| "step": 1061000 |
| }, |
| { |
| "epoch": 1.6945323992519472, |
| "grad_norm": 0.00016454454453196377, |
| "learning_rate": 1.6951927383698241e-06, |
| "loss": 0.0, |
| "step": 1061500 |
| }, |
| { |
| "epoch": 1.695330577489937, |
| "grad_norm": 0.00031503697391599417, |
| "learning_rate": 1.6865149403362156e-06, |
| "loss": 0.0, |
| "step": 1062000 |
| }, |
| { |
| "epoch": 1.6961287557279265, |
| "grad_norm": 0.003650024998933077, |
| "learning_rate": 1.6778580872187039e-06, |
| "loss": 0.0, |
| "step": 1062500 |
| }, |
| { |
| "epoch": 1.696926933965916, |
| "grad_norm": 0.00024623217177577317, |
| "learning_rate": 1.6692221926363444e-06, |
| "loss": 0.0, |
| "step": 1063000 |
| }, |
| { |
| "epoch": 1.6977251122039059, |
| "grad_norm": 0.0002650288224685937, |
| "learning_rate": 1.6606072701752229e-06, |
| "loss": 0.0, |
| "step": 1063500 |
| }, |
| { |
| "epoch": 1.6985232904418954, |
| "grad_norm": 0.0002041991101577878, |
| "learning_rate": 1.6520133333884214e-06, |
| "loss": 0.0, |
| "step": 1064000 |
| }, |
| { |
| "epoch": 1.699321468679885, |
| "grad_norm": 0.000113544927444309, |
| "learning_rate": 1.643440395796013e-06, |
| "loss": 0.0, |
| "step": 1064500 |
| }, |
| { |
| "epoch": 1.7001196469178748, |
| "grad_norm": 0.0001603996497578919, |
| "learning_rate": 1.6348884708850348e-06, |
| "loss": 0.0, |
| "step": 1065000 |
| }, |
| { |
| "epoch": 1.700917825155864, |
| "grad_norm": 0.00020040127856191248, |
| "learning_rate": 1.6263575721094708e-06, |
| "loss": 0.0, |
| "step": 1065500 |
| }, |
| { |
| "epoch": 1.7017160033938539, |
| "grad_norm": 0.00025933951837942004, |
| "learning_rate": 1.6178477128902141e-06, |
| "loss": 0.0, |
| "step": 1066000 |
| }, |
| { |
| "epoch": 1.7025141816318434, |
| "grad_norm": 0.00019231809710618109, |
| "learning_rate": 1.6093589066150687e-06, |
| "loss": 0.0, |
| "step": 1066500 |
| }, |
| { |
| "epoch": 1.703312359869833, |
| "grad_norm": 0.00026155367959290743, |
| "learning_rate": 1.6008911666387189e-06, |
| "loss": 0.0, |
| "step": 1067000 |
| }, |
| { |
| "epoch": 1.7041105381078228, |
| "grad_norm": 0.00014199796714819968, |
| "learning_rate": 1.5924445062826948e-06, |
| "loss": 0.0, |
| "step": 1067500 |
| }, |
| { |
| "epoch": 1.7049087163458123, |
| "grad_norm": 285.30670166015625, |
| "learning_rate": 1.584018938835377e-06, |
| "loss": 0.0, |
| "step": 1068000 |
| }, |
| { |
| "epoch": 1.7057068945838019, |
| "grad_norm": 0.00035446975380182266, |
| "learning_rate": 1.575614477551961e-06, |
| "loss": 0.0, |
| "step": 1068500 |
| }, |
| { |
| "epoch": 1.7065050728217916, |
| "grad_norm": 0.00017485932039562613, |
| "learning_rate": 1.5672311356544284e-06, |
| "loss": 0.0, |
| "step": 1069000 |
| }, |
| { |
| "epoch": 1.7073032510597812, |
| "grad_norm": 0.00022567392443306744, |
| "learning_rate": 1.5588689263315426e-06, |
| "loss": 0.0, |
| "step": 1069500 |
| }, |
| { |
| "epoch": 1.7081014292977708, |
| "grad_norm": 0.00024282569938804954, |
| "learning_rate": 1.550527862738822e-06, |
| "loss": 0.0, |
| "step": 1070000 |
| }, |
| { |
| "epoch": 1.7088996075357605, |
| "grad_norm": 0.00038521605893038213, |
| "learning_rate": 1.54220795799852e-06, |
| "loss": 0.0, |
| "step": 1070500 |
| }, |
| { |
| "epoch": 1.7096977857737499, |
| "grad_norm": 0.0002367474662605673, |
| "learning_rate": 1.5339092251995912e-06, |
| "loss": 0.0, |
| "step": 1071000 |
| }, |
| { |
| "epoch": 1.7104959640117396, |
| "grad_norm": 0.0003218221536371857, |
| "learning_rate": 1.5256316773976941e-06, |
| "loss": 0.0, |
| "step": 1071500 |
| }, |
| { |
| "epoch": 1.7112941422497292, |
| "grad_norm": 0.0006056890706531703, |
| "learning_rate": 1.5173753276151586e-06, |
| "loss": 0.0, |
| "step": 1072000 |
| }, |
| { |
| "epoch": 1.7120923204877188, |
| "grad_norm": 0.00013564640539698303, |
| "learning_rate": 1.5091401888409546e-06, |
| "loss": 0.0, |
| "step": 1072500 |
| }, |
| { |
| "epoch": 1.7128904987257085, |
| "grad_norm": 0.00019917692407034338, |
| "learning_rate": 1.5009262740306951e-06, |
| "loss": 0.0, |
| "step": 1073000 |
| }, |
| { |
| "epoch": 1.713688676963698, |
| "grad_norm": 0.00017702036711852998, |
| "learning_rate": 1.4927335961065953e-06, |
| "loss": 0.0, |
| "step": 1073500 |
| }, |
| { |
| "epoch": 1.7144868552016876, |
| "grad_norm": 0.0002473437343724072, |
| "learning_rate": 1.4845621679574666e-06, |
| "loss": 0.0, |
| "step": 1074000 |
| }, |
| { |
| "epoch": 1.7152850334396774, |
| "grad_norm": 0.00022888657986186445, |
| "learning_rate": 1.4764120024386812e-06, |
| "loss": 0.0, |
| "step": 1074500 |
| }, |
| { |
| "epoch": 1.7160832116776668, |
| "grad_norm": 0.00025228134472854435, |
| "learning_rate": 1.4682831123721707e-06, |
| "loss": 0.0, |
| "step": 1075000 |
| }, |
| { |
| "epoch": 1.7168813899156565, |
| "grad_norm": 0.0002196329296566546, |
| "learning_rate": 1.460175510546392e-06, |
| "loss": 0.0, |
| "step": 1075500 |
| }, |
| { |
| "epoch": 1.717679568153646, |
| "grad_norm": 0.00019092884031124413, |
| "learning_rate": 1.4520892097163059e-06, |
| "loss": 0.0, |
| "step": 1076000 |
| }, |
| { |
| "epoch": 1.7184777463916356, |
| "grad_norm": 1346.5693359375, |
| "learning_rate": 1.4440242226033672e-06, |
| "loss": 0.0, |
| "step": 1076500 |
| }, |
| { |
| "epoch": 1.7192759246296254, |
| "grad_norm": 0.00036944085150025785, |
| "learning_rate": 1.4359805618955025e-06, |
| "loss": 0.0, |
| "step": 1077000 |
| }, |
| { |
| "epoch": 1.720074102867615, |
| "grad_norm": 0.0002491538762114942, |
| "learning_rate": 1.4279582402470853e-06, |
| "loss": 0.0, |
| "step": 1077500 |
| }, |
| { |
| "epoch": 1.7208722811056045, |
| "grad_norm": 0.00014698713493999094, |
| "learning_rate": 1.419957270278912e-06, |
| "loss": 0.0, |
| "step": 1078000 |
| }, |
| { |
| "epoch": 1.7216704593435943, |
| "grad_norm": 0.0001589061866980046, |
| "learning_rate": 1.4119776645781956e-06, |
| "loss": 0.0, |
| "step": 1078500 |
| }, |
| { |
| "epoch": 1.7224686375815836, |
| "grad_norm": 0.00016320293070748448, |
| "learning_rate": 1.4040194356985408e-06, |
| "loss": 0.0, |
| "step": 1079000 |
| }, |
| { |
| "epoch": 1.7232668158195734, |
| "grad_norm": 0.00017447932623326778, |
| "learning_rate": 1.3960825961599112e-06, |
| "loss": 0.0, |
| "step": 1079500 |
| }, |
| { |
| "epoch": 1.724064994057563, |
| "grad_norm": 0.00021619001927319914, |
| "learning_rate": 1.38816715844863e-06, |
| "loss": 0.0, |
| "step": 1080000 |
| }, |
| { |
| "epoch": 1.7248631722955525, |
| "grad_norm": 0.0002612106909509748, |
| "learning_rate": 1.380273135017348e-06, |
| "loss": 0.0, |
| "step": 1080500 |
| }, |
| { |
| "epoch": 1.7256613505335423, |
| "grad_norm": 0.0005082807037979364, |
| "learning_rate": 1.3724005382850296e-06, |
| "loss": 0.0, |
| "step": 1081000 |
| }, |
| { |
| "epoch": 1.7264595287715319, |
| "grad_norm": 0.00015604299551341683, |
| "learning_rate": 1.3645493806369258e-06, |
| "loss": 0.0, |
| "step": 1081500 |
| }, |
| { |
| "epoch": 1.7272577070095214, |
| "grad_norm": 0.0005650034872815013, |
| "learning_rate": 1.3567196744245531e-06, |
| "loss": 0.0, |
| "step": 1082000 |
| }, |
| { |
| "epoch": 1.7280558852475112, |
| "grad_norm": 0.00015410668856929988, |
| "learning_rate": 1.3489114319657014e-06, |
| "loss": 0.0, |
| "step": 1082500 |
| }, |
| { |
| "epoch": 1.7288540634855007, |
| "grad_norm": 0.00018081202870234847, |
| "learning_rate": 1.3411246655443715e-06, |
| "loss": 0.0, |
| "step": 1083000 |
| }, |
| { |
| "epoch": 1.7296522417234903, |
| "grad_norm": 0.0002297761384397745, |
| "learning_rate": 1.3333593874107908e-06, |
| "loss": 0.0, |
| "step": 1083500 |
| }, |
| { |
| "epoch": 1.73045041996148, |
| "grad_norm": 0.00018270351574756205, |
| "learning_rate": 1.3256156097813754e-06, |
| "loss": 0.0, |
| "step": 1084000 |
| }, |
| { |
| "epoch": 1.7312485981994694, |
| "grad_norm": 0.001780197722837329, |
| "learning_rate": 1.3178933448387237e-06, |
| "loss": 0.0, |
| "step": 1084500 |
| }, |
| { |
| "epoch": 1.7320467764374592, |
| "grad_norm": 0.00021518795983865857, |
| "learning_rate": 1.3101926047315826e-06, |
| "loss": 0.0, |
| "step": 1085000 |
| }, |
| { |
| "epoch": 1.7328449546754487, |
| "grad_norm": 0.00019899863400496542, |
| "learning_rate": 1.3025134015748296e-06, |
| "loss": 0.0, |
| "step": 1085500 |
| }, |
| { |
| "epoch": 1.7336431329134383, |
| "grad_norm": 0.00016011096886359155, |
| "learning_rate": 1.294855747449481e-06, |
| "loss": 0.0, |
| "step": 1086000 |
| }, |
| { |
| "epoch": 1.734441311151428, |
| "grad_norm": 0.00023628213966730982, |
| "learning_rate": 1.2872196544026332e-06, |
| "loss": 0.0, |
| "step": 1086500 |
| }, |
| { |
| "epoch": 1.7352394893894176, |
| "grad_norm": 0.0001332947431365028, |
| "learning_rate": 1.2796051344474718e-06, |
| "loss": 0.0, |
| "step": 1087000 |
| }, |
| { |
| "epoch": 1.7360376676274072, |
| "grad_norm": 0.0002323123480891809, |
| "learning_rate": 1.2720121995632362e-06, |
| "loss": 0.0, |
| "step": 1087500 |
| }, |
| { |
| "epoch": 1.736835845865397, |
| "grad_norm": 0.00017582009604666382, |
| "learning_rate": 1.2644408616952142e-06, |
| "loss": 0.0, |
| "step": 1088000 |
| }, |
| { |
| "epoch": 1.7376340241033863, |
| "grad_norm": 0.00015031747170723975, |
| "learning_rate": 1.2568911327547178e-06, |
| "loss": 0.0, |
| "step": 1088500 |
| }, |
| { |
| "epoch": 1.738432202341376, |
| "grad_norm": 0.00016457086894661188, |
| "learning_rate": 1.2493630246190546e-06, |
| "loss": 0.0, |
| "step": 1089000 |
| }, |
| { |
| "epoch": 1.7392303805793656, |
| "grad_norm": 0.0003954498388338834, |
| "learning_rate": 1.2418565491315325e-06, |
| "loss": 0.0, |
| "step": 1089500 |
| }, |
| { |
| "epoch": 1.7400285588173552, |
| "grad_norm": 0.0008840158116072416, |
| "learning_rate": 1.234371718101412e-06, |
| "loss": 0.0, |
| "step": 1090000 |
| }, |
| { |
| "epoch": 1.740826737055345, |
| "grad_norm": 0.00028518703766167164, |
| "learning_rate": 1.2269085433039135e-06, |
| "loss": 0.0, |
| "step": 1090500 |
| }, |
| { |
| "epoch": 1.7416249152933345, |
| "grad_norm": 0.0001815830619307235, |
| "learning_rate": 1.2194670364801785e-06, |
| "loss": 0.0, |
| "step": 1091000 |
| }, |
| { |
| "epoch": 1.742423093531324, |
| "grad_norm": 0.00017911636678036302, |
| "learning_rate": 1.2120472093372642e-06, |
| "loss": 0.0, |
| "step": 1091500 |
| }, |
| { |
| "epoch": 1.7432212717693139, |
| "grad_norm": 0.0002147218183381483, |
| "learning_rate": 1.204649073548128e-06, |
| "loss": 0.0, |
| "step": 1092000 |
| }, |
| { |
| "epoch": 1.7440194500073032, |
| "grad_norm": 0.00016766555199865252, |
| "learning_rate": 1.1972726407515848e-06, |
| "loss": 0.0, |
| "step": 1092500 |
| }, |
| { |
| "epoch": 1.744817628245293, |
| "grad_norm": 0.00023504404816776514, |
| "learning_rate": 1.1899179225523305e-06, |
| "loss": 0.0, |
| "step": 1093000 |
| }, |
| { |
| "epoch": 1.7456158064832825, |
| "grad_norm": 0.0001661064598010853, |
| "learning_rate": 1.182584930520874e-06, |
| "loss": 0.0, |
| "step": 1093500 |
| }, |
| { |
| "epoch": 1.746413984721272, |
| "grad_norm": 0.00013320970174390823, |
| "learning_rate": 1.175273676193566e-06, |
| "loss": 0.0, |
| "step": 1094000 |
| }, |
| { |
| "epoch": 1.7472121629592618, |
| "grad_norm": 0.00019263781723566353, |
| "learning_rate": 1.167984171072541e-06, |
| "loss": 0.0, |
| "step": 1094500 |
| }, |
| { |
| "epoch": 1.7480103411972514, |
| "grad_norm": 0.00018430198542773724, |
| "learning_rate": 1.1607164266257297e-06, |
| "loss": 0.0, |
| "step": 1095000 |
| }, |
| { |
| "epoch": 1.748808519435241, |
| "grad_norm": 0.000189875194337219, |
| "learning_rate": 1.1534704542868268e-06, |
| "loss": 0.0, |
| "step": 1095500 |
| }, |
| { |
| "epoch": 1.7496066976732307, |
| "grad_norm": 0.00014763257058802992, |
| "learning_rate": 1.1462462654552685e-06, |
| "loss": 0.0, |
| "step": 1096000 |
| }, |
| { |
| "epoch": 1.75040487591122, |
| "grad_norm": 0.0001301927404711023, |
| "learning_rate": 1.139043871496227e-06, |
| "loss": 0.0, |
| "step": 1096500 |
| }, |
| { |
| "epoch": 1.7512030541492098, |
| "grad_norm": 0.000194509033462964, |
| "learning_rate": 1.1318632837405885e-06, |
| "loss": 0.0, |
| "step": 1097000 |
| }, |
| { |
| "epoch": 1.7520012323871994, |
| "grad_norm": 0.00022325903410091996, |
| "learning_rate": 1.1247045134849248e-06, |
| "loss": 0.0, |
| "step": 1097500 |
| }, |
| { |
| "epoch": 1.752799410625189, |
| "grad_norm": 0.00014271271356847137, |
| "learning_rate": 1.1175675719914924e-06, |
| "loss": 0.0, |
| "step": 1098000 |
| }, |
| { |
| "epoch": 1.7535975888631787, |
| "grad_norm": 0.00030372265609912574, |
| "learning_rate": 1.1104524704882014e-06, |
| "loss": 0.0, |
| "step": 1098500 |
| }, |
| { |
| "epoch": 1.7543957671011683, |
| "grad_norm": 0.00020143936853855848, |
| "learning_rate": 1.1033592201686093e-06, |
| "loss": 0.0, |
| "step": 1099000 |
| }, |
| { |
| "epoch": 1.7551939453391578, |
| "grad_norm": 0.00019451904518064111, |
| "learning_rate": 1.0962878321918884e-06, |
| "loss": 0.0, |
| "step": 1099500 |
| }, |
| { |
| "epoch": 1.7559921235771476, |
| "grad_norm": 0.0001757531426846981, |
| "learning_rate": 1.0892383176828213e-06, |
| "loss": 0.0, |
| "step": 1100000 |
| }, |
| { |
| "epoch": 1.7567903018151372, |
| "grad_norm": 0.0002402666286798194, |
| "learning_rate": 1.0822106877317834e-06, |
| "loss": 0.0, |
| "step": 1100500 |
| }, |
| { |
| "epoch": 1.7575884800531267, |
| "grad_norm": 0.0002771165454760194, |
| "learning_rate": 1.0752049533947122e-06, |
| "loss": 0.0, |
| "step": 1101000 |
| }, |
| { |
| "epoch": 1.7583866582911165, |
| "grad_norm": 0.00023865242837928236, |
| "learning_rate": 1.0682211256931051e-06, |
| "loss": 0.0, |
| "step": 1101500 |
| }, |
| { |
| "epoch": 1.7591848365291058, |
| "grad_norm": 0.000238187174545601, |
| "learning_rate": 1.0612592156139933e-06, |
| "loss": 0.0, |
| "step": 1102000 |
| }, |
| { |
| "epoch": 1.7599830147670956, |
| "grad_norm": 0.00048370350850746036, |
| "learning_rate": 1.0543192341099306e-06, |
| "loss": 0.0, |
| "step": 1102500 |
| }, |
| { |
| "epoch": 1.7607811930050852, |
| "grad_norm": 0.00022986202384345233, |
| "learning_rate": 1.0474011920989667e-06, |
| "loss": 0.0, |
| "step": 1103000 |
| }, |
| { |
| "epoch": 1.7615793712430747, |
| "grad_norm": 0.0001623473799554631, |
| "learning_rate": 1.0405051004646377e-06, |
| "loss": 0.0, |
| "step": 1103500 |
| }, |
| { |
| "epoch": 1.7623775494810645, |
| "grad_norm": 0.0001603475830052048, |
| "learning_rate": 1.0336309700559531e-06, |
| "loss": 0.0, |
| "step": 1104000 |
| }, |
| { |
| "epoch": 1.763175727719054, |
| "grad_norm": 0.00022371606610249728, |
| "learning_rate": 1.0267788116873628e-06, |
| "loss": 0.0, |
| "step": 1104500 |
| }, |
| { |
| "epoch": 1.7639739059570436, |
| "grad_norm": 0.00020552480418700725, |
| "learning_rate": 1.0199486361387567e-06, |
| "loss": 0.0, |
| "step": 1105000 |
| }, |
| { |
| "epoch": 1.7647720841950334, |
| "grad_norm": 0.0001210166301461868, |
| "learning_rate": 1.0131404541554412e-06, |
| "loss": 0.0, |
| "step": 1105500 |
| }, |
| { |
| "epoch": 1.7655702624330227, |
| "grad_norm": 0.00022290610650088638, |
| "learning_rate": 1.0063542764481204e-06, |
| "loss": 0.0, |
| "step": 1106000 |
| }, |
| { |
| "epoch": 1.7663684406710125, |
| "grad_norm": 0.0002481382107362151, |
| "learning_rate": 9.995901136928776e-07, |
| "loss": 0.0, |
| "step": 1106500 |
| }, |
| { |
| "epoch": 1.767166618909002, |
| "grad_norm": 0.0002452095504850149, |
| "learning_rate": 9.928479765311689e-07, |
| "loss": 0.0, |
| "step": 1107000 |
| }, |
| { |
| "epoch": 1.7679647971469916, |
| "grad_norm": 0.00017975401715375483, |
| "learning_rate": 9.86127875569796e-07, |
| "loss": 0.0, |
| "step": 1107500 |
| }, |
| { |
| "epoch": 1.7687629753849814, |
| "grad_norm": 0.0002084321022266522, |
| "learning_rate": 9.794298213808912e-07, |
| "loss": 0.0, |
| "step": 1108000 |
| }, |
| { |
| "epoch": 1.769561153622971, |
| "grad_norm": 0.000212875209399499, |
| "learning_rate": 9.727538245019047e-07, |
| "loss": 0.0, |
| "step": 1108500 |
| }, |
| { |
| "epoch": 1.7703593318609605, |
| "grad_norm": 0.00023228510690387338, |
| "learning_rate": 9.66099895435587e-07, |
| "loss": 0.0, |
| "step": 1109000 |
| }, |
| { |
| "epoch": 1.7711575100989503, |
| "grad_norm": 0.0001471398863941431, |
| "learning_rate": 9.594680446499716e-07, |
| "loss": 0.0, |
| "step": 1109500 |
| }, |
| { |
| "epoch": 1.7719556883369396, |
| "grad_norm": 0.00029612609068863094, |
| "learning_rate": 9.528582825783505e-07, |
| "loss": 0.0, |
| "step": 1110000 |
| }, |
| { |
| "epoch": 1.7727538665749294, |
| "grad_norm": 0.00016415739082731307, |
| "learning_rate": 9.462706196192777e-07, |
| "loss": 0.0, |
| "step": 1110500 |
| }, |
| { |
| "epoch": 1.773552044812919, |
| "grad_norm": 0.00027014673105441034, |
| "learning_rate": 9.397050661365348e-07, |
| "loss": 0.0, |
| "step": 1111000 |
| }, |
| { |
| "epoch": 1.7743502230509085, |
| "grad_norm": 0.0001431436976417899, |
| "learning_rate": 9.331616324591142e-07, |
| "loss": 0.0, |
| "step": 1111500 |
| }, |
| { |
| "epoch": 1.7751484012888983, |
| "grad_norm": 0.000254760350799188, |
| "learning_rate": 9.266403288812197e-07, |
| "loss": 0.0, |
| "step": 1112000 |
| }, |
| { |
| "epoch": 1.7759465795268878, |
| "grad_norm": 0.0002271405392093584, |
| "learning_rate": 9.201411656622333e-07, |
| "loss": 0.0, |
| "step": 1112500 |
| }, |
| { |
| "epoch": 1.7767447577648774, |
| "grad_norm": 0.0002613988472148776, |
| "learning_rate": 9.136641530267126e-07, |
| "loss": 0.0, |
| "step": 1113000 |
| }, |
| { |
| "epoch": 1.7775429360028672, |
| "grad_norm": 0.000750661245547235, |
| "learning_rate": 9.072093011643567e-07, |
| "loss": 0.0, |
| "step": 1113500 |
| }, |
| { |
| "epoch": 1.7783411142408567, |
| "grad_norm": 0.00017439897055737674, |
| "learning_rate": 9.007766202300094e-07, |
| "loss": 0.0, |
| "step": 1114000 |
| }, |
| { |
| "epoch": 1.7791392924788463, |
| "grad_norm": 0.0002634642878547311, |
| "learning_rate": 8.943661203436337e-07, |
| "loss": 0.0, |
| "step": 1114500 |
| }, |
| { |
| "epoch": 1.779937470716836, |
| "grad_norm": 0.00016087290714494884, |
| "learning_rate": 8.879778115902942e-07, |
| "loss": 0.0, |
| "step": 1115000 |
| }, |
| { |
| "epoch": 1.7807356489548254, |
| "grad_norm": 0.00019531312864273787, |
| "learning_rate": 8.816117040201449e-07, |
| "loss": 0.0, |
| "step": 1115500 |
| }, |
| { |
| "epoch": 1.7815338271928152, |
| "grad_norm": 0.00020467853755690157, |
| "learning_rate": 8.752678076484194e-07, |
| "loss": 0.0, |
| "step": 1116000 |
| }, |
| { |
| "epoch": 1.7823320054308047, |
| "grad_norm": 0.00017259104060940444, |
| "learning_rate": 8.689461324553976e-07, |
| "loss": 0.0, |
| "step": 1116500 |
| }, |
| { |
| "epoch": 1.7831301836687943, |
| "grad_norm": 0.0002617633144836873, |
| "learning_rate": 8.626466883864093e-07, |
| "loss": 0.0, |
| "step": 1117000 |
| }, |
| { |
| "epoch": 1.783928361906784, |
| "grad_norm": 0.00016495882300660014, |
| "learning_rate": 8.563694853518017e-07, |
| "loss": 0.0, |
| "step": 1117500 |
| }, |
| { |
| "epoch": 1.7847265401447736, |
| "grad_norm": 0.00015124342462513596, |
| "learning_rate": 8.501145332269439e-07, |
| "loss": 0.0, |
| "step": 1118000 |
| }, |
| { |
| "epoch": 1.7855247183827632, |
| "grad_norm": 0.0003004848840646446, |
| "learning_rate": 8.438818418521893e-07, |
| "loss": 0.0, |
| "step": 1118500 |
| }, |
| { |
| "epoch": 1.786322896620753, |
| "grad_norm": 0.00018394803919363767, |
| "learning_rate": 8.376714210328728e-07, |
| "loss": 0.0, |
| "step": 1119000 |
| }, |
| { |
| "epoch": 1.7871210748587423, |
| "grad_norm": 0.00023628614144399762, |
| "learning_rate": 8.314832805393008e-07, |
| "loss": 0.0, |
| "step": 1119500 |
| }, |
| { |
| "epoch": 1.787919253096732, |
| "grad_norm": 0.00016657341620884836, |
| "learning_rate": 8.253174301067141e-07, |
| "loss": 0.0, |
| "step": 1120000 |
| }, |
| { |
| "epoch": 1.7887174313347216, |
| "grad_norm": 0.00014997956168372184, |
| "learning_rate": 8.191738794352999e-07, |
| "loss": 0.0, |
| "step": 1120500 |
| }, |
| { |
| "epoch": 1.7895156095727112, |
| "grad_norm": 0.0047565544955432415, |
| "learning_rate": 8.130526381901488e-07, |
| "loss": 0.0, |
| "step": 1121000 |
| }, |
| { |
| "epoch": 1.790313787810701, |
| "grad_norm": 0.00021316143102012575, |
| "learning_rate": 8.069537160012741e-07, |
| "loss": 0.0, |
| "step": 1121500 |
| }, |
| { |
| "epoch": 1.7911119660486905, |
| "grad_norm": 0.00012046356278005987, |
| "learning_rate": 8.008771224635575e-07, |
| "loss": 0.0, |
| "step": 1122000 |
| }, |
| { |
| "epoch": 1.79191014428668, |
| "grad_norm": 0.010202116332948208, |
| "learning_rate": 7.948228671367653e-07, |
| "loss": 0.0, |
| "step": 1122500 |
| }, |
| { |
| "epoch": 1.7927083225246698, |
| "grad_norm": 0.00019842854817397892, |
| "learning_rate": 7.887909595455101e-07, |
| "loss": 0.0, |
| "step": 1123000 |
| }, |
| { |
| "epoch": 1.7935065007626592, |
| "grad_norm": 0.0003019568102899939, |
| "learning_rate": 7.827814091792546e-07, |
| "loss": 0.0, |
| "step": 1123500 |
| }, |
| { |
| "epoch": 1.794304679000649, |
| "grad_norm": 0.00022996992629487067, |
| "learning_rate": 7.767942254922927e-07, |
| "loss": 0.0, |
| "step": 1124000 |
| }, |
| { |
| "epoch": 1.7951028572386385, |
| "grad_norm": 0.00019457354210317135, |
| "learning_rate": 7.708294179037184e-07, |
| "loss": 0.0, |
| "step": 1124500 |
| }, |
| { |
| "epoch": 1.795901035476628, |
| "grad_norm": 0.00023033515026327223, |
| "learning_rate": 7.648869957974353e-07, |
| "loss": 0.0, |
| "step": 1125000 |
| }, |
| { |
| "epoch": 1.7966992137146178, |
| "grad_norm": 0.00018613325664773583, |
| "learning_rate": 7.589669685221251e-07, |
| "loss": 0.0, |
| "step": 1125500 |
| }, |
| { |
| "epoch": 1.7974973919526074, |
| "grad_norm": 0.00013318394485395402, |
| "learning_rate": 7.53069345391233e-07, |
| "loss": 0.0, |
| "step": 1126000 |
| }, |
| { |
| "epoch": 1.798295570190597, |
| "grad_norm": 0.00020175697864033282, |
| "learning_rate": 7.471941356829653e-07, |
| "loss": 0.0, |
| "step": 1126500 |
| }, |
| { |
| "epoch": 1.7990937484285867, |
| "grad_norm": 0.00015706892008893192, |
| "learning_rate": 7.413413486402637e-07, |
| "loss": 0.0, |
| "step": 1127000 |
| }, |
| { |
| "epoch": 1.799891926666576, |
| "grad_norm": 0.0005745620583184063, |
| "learning_rate": 7.35510993470796e-07, |
| "loss": 0.0, |
| "step": 1127500 |
| }, |
| { |
| "epoch": 1.8006901049045658, |
| "grad_norm": 0.00015678079216741025, |
| "learning_rate": 7.297030793469367e-07, |
| "loss": 0.0, |
| "step": 1128000 |
| }, |
| { |
| "epoch": 1.8014882831425554, |
| "grad_norm": 0.0001835569564718753, |
| "learning_rate": 7.239176154057619e-07, |
| "loss": 0.0, |
| "step": 1128500 |
| }, |
| { |
| "epoch": 1.802286461380545, |
| "grad_norm": 0.00017835032485891134, |
| "learning_rate": 7.181546107490228e-07, |
| "loss": 0.0, |
| "step": 1129000 |
| }, |
| { |
| "epoch": 1.8030846396185347, |
| "grad_norm": 0.00019548686395864934, |
| "learning_rate": 7.124140744431368e-07, |
| "loss": 0.0, |
| "step": 1129500 |
| }, |
| { |
| "epoch": 1.8038828178565243, |
| "grad_norm": 0.00024722403031773865, |
| "learning_rate": 7.066960155191781e-07, |
| "loss": 0.0, |
| "step": 1130000 |
| }, |
| { |
| "epoch": 1.8046809960945138, |
| "grad_norm": 0.00014821036893408746, |
| "learning_rate": 7.010004429728611e-07, |
| "loss": 0.0, |
| "step": 1130500 |
| }, |
| { |
| "epoch": 1.8054791743325036, |
| "grad_norm": 0.0010676577221602201, |
| "learning_rate": 6.953273657645182e-07, |
| "loss": 0.0, |
| "step": 1131000 |
| }, |
| { |
| "epoch": 1.8062773525704932, |
| "grad_norm": 0.00027099967701360583, |
| "learning_rate": 6.89676792819095e-07, |
| "loss": 0.0, |
| "step": 1131500 |
| }, |
| { |
| "epoch": 1.8070755308084827, |
| "grad_norm": 0.0002371660084463656, |
| "learning_rate": 6.840487330261308e-07, |
| "loss": 0.0, |
| "step": 1132000 |
| }, |
| { |
| "epoch": 1.8078737090464725, |
| "grad_norm": 0.0002728116814978421, |
| "learning_rate": 6.784431952397546e-07, |
| "loss": 0.0, |
| "step": 1132500 |
| }, |
| { |
| "epoch": 1.8086718872844618, |
| "grad_norm": 0.00018127913062926382, |
| "learning_rate": 6.728601882786523e-07, |
| "loss": 0.0, |
| "step": 1133000 |
| }, |
| { |
| "epoch": 1.8094700655224516, |
| "grad_norm": 0.00026493624318391085, |
| "learning_rate": 6.672997209260712e-07, |
| "loss": 0.0, |
| "step": 1133500 |
| }, |
| { |
| "epoch": 1.8102682437604412, |
| "grad_norm": 1832.54248046875, |
| "learning_rate": 6.617618019298005e-07, |
| "loss": 0.0, |
| "step": 1134000 |
| }, |
| { |
| "epoch": 1.8110664219984307, |
| "grad_norm": 0.0001290196378249675, |
| "learning_rate": 6.562464400021512e-07, |
| "loss": 0.0, |
| "step": 1134500 |
| }, |
| { |
| "epoch": 1.8118646002364205, |
| "grad_norm": 0.00015263666864484549, |
| "learning_rate": 6.507536438199474e-07, |
| "loss": 0.0, |
| "step": 1135000 |
| }, |
| { |
| "epoch": 1.81266277847441, |
| "grad_norm": 0.00016473012510687113, |
| "learning_rate": 6.452834220245168e-07, |
| "loss": 0.0, |
| "step": 1135500 |
| }, |
| { |
| "epoch": 1.8134609567123996, |
| "grad_norm": 0.0001999867963604629, |
| "learning_rate": 6.398357832216705e-07, |
| "loss": 0.0, |
| "step": 1136000 |
| }, |
| { |
| "epoch": 1.8142591349503894, |
| "grad_norm": 0.016133194789290428, |
| "learning_rate": 6.344107359816898e-07, |
| "loss": 0.0, |
| "step": 1136500 |
| }, |
| { |
| "epoch": 1.8150573131883787, |
| "grad_norm": 0.00015022288425825536, |
| "learning_rate": 6.290082888393172e-07, |
| "loss": 0.0, |
| "step": 1137000 |
| }, |
| { |
| "epoch": 1.8158554914263685, |
| "grad_norm": 0.00015219298074953258, |
| "learning_rate": 6.236284502937428e-07, |
| "loss": 0.0, |
| "step": 1137500 |
| }, |
| { |
| "epoch": 1.816653669664358, |
| "grad_norm": 0.00019694263755809516, |
| "learning_rate": 6.182712288085828e-07, |
| "loss": 0.0, |
| "step": 1138000 |
| }, |
| { |
| "epoch": 1.8174518479023476, |
| "grad_norm": 0.00015368903405033052, |
| "learning_rate": 6.129366328118758e-07, |
| "loss": 0.0, |
| "step": 1138500 |
| }, |
| { |
| "epoch": 1.8182500261403374, |
| "grad_norm": 0.0006411715294234455, |
| "learning_rate": 6.076246706960631e-07, |
| "loss": 0.0, |
| "step": 1139000 |
| }, |
| { |
| "epoch": 1.819048204378327, |
| "grad_norm": 0.00017495772044640034, |
| "learning_rate": 6.023353508179835e-07, |
| "loss": 0.0, |
| "step": 1139500 |
| }, |
| { |
| "epoch": 1.8198463826163165, |
| "grad_norm": 0.00028414788539521396, |
| "learning_rate": 5.970686814988474e-07, |
| "loss": 0.0, |
| "step": 1140000 |
| }, |
| { |
| "epoch": 1.8206445608543063, |
| "grad_norm": 0.00014593903324566782, |
| "learning_rate": 5.91824671024237e-07, |
| "loss": 0.0, |
| "step": 1140500 |
| }, |
| { |
| "epoch": 1.8214427390922956, |
| "grad_norm": 0.00016339162539225072, |
| "learning_rate": 5.866033276440863e-07, |
| "loss": 0.0, |
| "step": 1141000 |
| }, |
| { |
| "epoch": 1.8222409173302854, |
| "grad_norm": 0.00025076873134821653, |
| "learning_rate": 5.814046595726663e-07, |
| "loss": 0.0, |
| "step": 1141500 |
| }, |
| { |
| "epoch": 1.823039095568275, |
| "grad_norm": 0.00019486478413455188, |
| "learning_rate": 5.762286749885781e-07, |
| "loss": 0.0, |
| "step": 1142000 |
| }, |
| { |
| "epoch": 1.8238372738062645, |
| "grad_norm": 0.00026214728131890297, |
| "learning_rate": 5.710753820347331e-07, |
| "loss": 0.0, |
| "step": 1142500 |
| }, |
| { |
| "epoch": 1.8246354520442543, |
| "grad_norm": 0.000143070996273309, |
| "learning_rate": 5.659447888183511e-07, |
| "loss": 0.0, |
| "step": 1143000 |
| }, |
| { |
| "epoch": 1.8254336302822438, |
| "grad_norm": 0.00029296561842784286, |
| "learning_rate": 5.608369034109306e-07, |
| "loss": 0.0, |
| "step": 1143500 |
| }, |
| { |
| "epoch": 1.8262318085202334, |
| "grad_norm": 0.0001531924499431625, |
| "learning_rate": 5.557517338482537e-07, |
| "loss": 0.0, |
| "step": 1144000 |
| }, |
| { |
| "epoch": 1.8270299867582231, |
| "grad_norm": 0.000254272687016055, |
| "learning_rate": 5.50689288130366e-07, |
| "loss": 0.0, |
| "step": 1144500 |
| }, |
| { |
| "epoch": 1.8278281649962125, |
| "grad_norm": 0.00014179408026393503, |
| "learning_rate": 5.456495742215551e-07, |
| "loss": 0.0, |
| "step": 1145000 |
| }, |
| { |
| "epoch": 1.8286263432342023, |
| "grad_norm": 0.00011709563841577619, |
| "learning_rate": 5.406326000503553e-07, |
| "loss": 0.0, |
| "step": 1145500 |
| }, |
| { |
| "epoch": 1.829424521472192, |
| "grad_norm": 0.0021168107632547617, |
| "learning_rate": 5.356383735095249e-07, |
| "loss": 0.0, |
| "step": 1146000 |
| }, |
| { |
| "epoch": 1.8302226997101814, |
| "grad_norm": 0.00023661217710468918, |
| "learning_rate": 5.30666902456034e-07, |
| "loss": 0.0, |
| "step": 1146500 |
| }, |
| { |
| "epoch": 1.8310208779481711, |
| "grad_norm": 1002.8739624023438, |
| "learning_rate": 5.257181947110512e-07, |
| "loss": 0.0, |
| "step": 1147000 |
| }, |
| { |
| "epoch": 1.8318190561861607, |
| "grad_norm": 0.0001538294309284538, |
| "learning_rate": 5.207922580599356e-07, |
| "loss": 0.0, |
| "step": 1147500 |
| }, |
| { |
| "epoch": 1.8326172344241503, |
| "grad_norm": 0.00015853659715503454, |
| "learning_rate": 5.158891002522282e-07, |
| "loss": 0.0, |
| "step": 1148000 |
| }, |
| { |
| "epoch": 1.83341541266214, |
| "grad_norm": 0.0006100303144194186, |
| "learning_rate": 5.110087290016252e-07, |
| "loss": 0.0, |
| "step": 1148500 |
| }, |
| { |
| "epoch": 1.8342135909001296, |
| "grad_norm": 0.00016067329852376133, |
| "learning_rate": 5.061511519859785e-07, |
| "loss": 0.0, |
| "step": 1149000 |
| }, |
| { |
| "epoch": 1.8350117691381191, |
| "grad_norm": 0.00024289886641781777, |
| "learning_rate": 5.013163768472783e-07, |
| "loss": 0.0, |
| "step": 1149500 |
| }, |
| { |
| "epoch": 1.835809947376109, |
| "grad_norm": 0.00019153668836224824, |
| "learning_rate": 4.965044111916489e-07, |
| "loss": 0.0, |
| "step": 1150000 |
| }, |
| { |
| "epoch": 1.8366081256140983, |
| "grad_norm": 0.00018519822333473712, |
| "learning_rate": 4.917152625893212e-07, |
| "loss": 0.0, |
| "step": 1150500 |
| }, |
| { |
| "epoch": 1.837406303852088, |
| "grad_norm": 0.0001868938561528921, |
| "learning_rate": 4.869489385746367e-07, |
| "loss": 0.0, |
| "step": 1151000 |
| }, |
| { |
| "epoch": 1.8382044820900776, |
| "grad_norm": 0.000234300852753222, |
| "learning_rate": 4.82205446646029e-07, |
| "loss": 0.0, |
| "step": 1151500 |
| }, |
| { |
| "epoch": 1.8390026603280671, |
| "grad_norm": 0.00017578886763658375, |
| "learning_rate": 4.774847942660055e-07, |
| "loss": 0.0, |
| "step": 1152000 |
| }, |
| { |
| "epoch": 1.839800838566057, |
| "grad_norm": 0.00021545674826484174, |
| "learning_rate": 4.727869888611519e-07, |
| "loss": 0.0, |
| "step": 1152500 |
| }, |
| { |
| "epoch": 1.8405990168040465, |
| "grad_norm": 0.0005743197398260236, |
| "learning_rate": 4.681120378220982e-07, |
| "loss": 0.0, |
| "step": 1153000 |
| }, |
| { |
| "epoch": 1.841397195042036, |
| "grad_norm": 0.00012018709094263613, |
| "learning_rate": 4.634599485035346e-07, |
| "loss": 0.0, |
| "step": 1153500 |
| }, |
| { |
| "epoch": 1.8421953732800258, |
| "grad_norm": 0.00021630495029967278, |
| "learning_rate": 4.588307282241749e-07, |
| "loss": 0.0, |
| "step": 1154000 |
| }, |
| { |
| "epoch": 1.8429935515180151, |
| "grad_norm": 0.0001780359452823177, |
| "learning_rate": 4.542243842667537e-07, |
| "loss": 0.0, |
| "step": 1154500 |
| }, |
| { |
| "epoch": 1.843791729756005, |
| "grad_norm": 0.00031716233934275806, |
| "learning_rate": 4.496409238780258e-07, |
| "loss": 0.0, |
| "step": 1155000 |
| }, |
| { |
| "epoch": 1.8445899079939945, |
| "grad_norm": 0.0009638772462494671, |
| "learning_rate": 4.450803542687365e-07, |
| "loss": 0.0, |
| "step": 1155500 |
| }, |
| { |
| "epoch": 1.845388086231984, |
| "grad_norm": 0.00016452405543532223, |
| "learning_rate": 4.4054268261362496e-07, |
| "loss": 0.0, |
| "step": 1156000 |
| }, |
| { |
| "epoch": 1.8461862644699738, |
| "grad_norm": 0.0001442407228751108, |
| "learning_rate": 4.360279160514008e-07, |
| "loss": 0.0, |
| "step": 1156500 |
| }, |
| { |
| "epoch": 1.8469844427079634, |
| "grad_norm": 0.00014954974176362157, |
| "learning_rate": 4.315360616847458e-07, |
| "loss": 0.0, |
| "step": 1157000 |
| }, |
| { |
| "epoch": 1.847782620945953, |
| "grad_norm": 0.00043269319576211274, |
| "learning_rate": 4.2706712658029224e-07, |
| "loss": 0.0, |
| "step": 1157500 |
| }, |
| { |
| "epoch": 1.8485807991839427, |
| "grad_norm": 0.00015773314225953072, |
| "learning_rate": 4.226211177686162e-07, |
| "loss": 0.0, |
| "step": 1158000 |
| }, |
| { |
| "epoch": 1.849378977421932, |
| "grad_norm": 0.0002262149064335972, |
| "learning_rate": 4.1819804224422444e-07, |
| "loss": 0.0, |
| "step": 1158500 |
| }, |
| { |
| "epoch": 1.8501771556599218, |
| "grad_norm": 0.00019894444267265499, |
| "learning_rate": 4.137979069655473e-07, |
| "loss": 0.0, |
| "step": 1159000 |
| }, |
| { |
| "epoch": 1.8509753338979114, |
| "grad_norm": 0.0001444010267732665, |
| "learning_rate": 4.0942071885492595e-07, |
| "loss": 0.0, |
| "step": 1159500 |
| }, |
| { |
| "epoch": 1.851773512135901, |
| "grad_norm": 0.00019132070883642882, |
| "learning_rate": 4.0506648479859523e-07, |
| "loss": 0.0, |
| "step": 1160000 |
| }, |
| { |
| "epoch": 1.8525716903738907, |
| "grad_norm": 0.00022376253036782146, |
| "learning_rate": 4.007352116466889e-07, |
| "loss": 0.0, |
| "step": 1160500 |
| }, |
| { |
| "epoch": 1.8533698686118802, |
| "grad_norm": 0.00021582655608654022, |
| "learning_rate": 3.964269062132081e-07, |
| "loss": 0.0, |
| "step": 1161000 |
| }, |
| { |
| "epoch": 1.8541680468498698, |
| "grad_norm": 0.00011844315304188058, |
| "learning_rate": 3.921415752760227e-07, |
| "loss": 0.0, |
| "step": 1161500 |
| }, |
| { |
| "epoch": 1.8549662250878596, |
| "grad_norm": 0.000183063093572855, |
| "learning_rate": 3.878792255768615e-07, |
| "loss": 0.0, |
| "step": 1162000 |
| }, |
| { |
| "epoch": 1.8557644033258491, |
| "grad_norm": 0.0001891221763798967, |
| "learning_rate": 3.8363986382130066e-07, |
| "loss": 0.0, |
| "step": 1162500 |
| }, |
| { |
| "epoch": 1.8565625815638387, |
| "grad_norm": 0.0009782819543033838, |
| "learning_rate": 3.794234966787502e-07, |
| "loss": 0.0, |
| "step": 1163000 |
| }, |
| { |
| "epoch": 1.8573607598018285, |
| "grad_norm": 0.00015358542441390455, |
| "learning_rate": 3.7523013078243906e-07, |
| "loss": 0.0, |
| "step": 1163500 |
| }, |
| { |
| "epoch": 1.8581589380398178, |
| "grad_norm": 0.002302468754351139, |
| "learning_rate": 3.710597727294185e-07, |
| "loss": 0.0, |
| "step": 1164000 |
| }, |
| { |
| "epoch": 1.8589571162778076, |
| "grad_norm": 0.00019968704145867378, |
| "learning_rate": 3.6691242908054036e-07, |
| "loss": 0.0, |
| "step": 1164500 |
| }, |
| { |
| "epoch": 1.8597552945157971, |
| "grad_norm": 0.00015924213221296668, |
| "learning_rate": 3.6278810636044713e-07, |
| "loss": 0.0, |
| "step": 1165000 |
| }, |
| { |
| "epoch": 1.8605534727537867, |
| "grad_norm": 0.0002347809204366058, |
| "learning_rate": 3.586868110575686e-07, |
| "loss": 0.0, |
| "step": 1165500 |
| }, |
| { |
| "epoch": 1.8613516509917765, |
| "grad_norm": 0.00016319741553161293, |
| "learning_rate": 3.5460854962410526e-07, |
| "loss": 0.0, |
| "step": 1166000 |
| }, |
| { |
| "epoch": 1.862149829229766, |
| "grad_norm": 0.002569986740127206, |
| "learning_rate": 3.505533284760232e-07, |
| "loss": 0.0, |
| "step": 1166500 |
| }, |
| { |
| "epoch": 1.8629480074677556, |
| "grad_norm": 0.00019444114877842367, |
| "learning_rate": 3.465211539930374e-07, |
| "loss": 0.0, |
| "step": 1167000 |
| }, |
| { |
| "epoch": 1.8637461857057454, |
| "grad_norm": 0.00016536629118490964, |
| "learning_rate": 3.4251203251860876e-07, |
| "loss": 0.0, |
| "step": 1167500 |
| }, |
| { |
| "epoch": 1.8645443639437347, |
| "grad_norm": 0.00015523859474342316, |
| "learning_rate": 3.385259703599303e-07, |
| "loss": 0.0, |
| "step": 1168000 |
| }, |
| { |
| "epoch": 1.8653425421817245, |
| "grad_norm": 0.0039033798966556787, |
| "learning_rate": 3.345629737879158e-07, |
| "loss": 0.0, |
| "step": 1168500 |
| }, |
| { |
| "epoch": 1.866140720419714, |
| "grad_norm": 0.0003161656204611063, |
| "learning_rate": 3.306230490371931e-07, |
| "loss": 0.0, |
| "step": 1169000 |
| }, |
| { |
| "epoch": 1.8669388986577036, |
| "grad_norm": 0.0012187734246253967, |
| "learning_rate": 3.267062023060957e-07, |
| "loss": 0.0, |
| "step": 1169500 |
| }, |
| { |
| "epoch": 1.8677370768956933, |
| "grad_norm": 0.00017112820933107287, |
| "learning_rate": 3.228124397566479e-07, |
| "loss": 0.0, |
| "step": 1170000 |
| }, |
| { |
| "epoch": 1.868535255133683, |
| "grad_norm": 0.00031683017732575536, |
| "learning_rate": 3.189417675145578e-07, |
| "loss": 0.0, |
| "step": 1170500 |
| }, |
| { |
| "epoch": 1.8693334333716725, |
| "grad_norm": 0.000180508301127702, |
| "learning_rate": 3.1509419166920797e-07, |
| "loss": 0.0, |
| "step": 1171000 |
| }, |
| { |
| "epoch": 1.8701316116096622, |
| "grad_norm": 1.3118445873260498, |
| "learning_rate": 3.1126971827364627e-07, |
| "loss": 0.0, |
| "step": 1171500 |
| }, |
| { |
| "epoch": 1.8709297898476516, |
| "grad_norm": 0.00019488642283249646, |
| "learning_rate": 3.074683533445749e-07, |
| "loss": 0.0, |
| "step": 1172000 |
| }, |
| { |
| "epoch": 1.8717279680856413, |
| "grad_norm": 0.0001434733421774581, |
| "learning_rate": 3.036901028623401e-07, |
| "loss": 0.0, |
| "step": 1172500 |
| }, |
| { |
| "epoch": 1.872526146323631, |
| "grad_norm": 0.0001922912779264152, |
| "learning_rate": 2.999349727709272e-07, |
| "loss": 0.0, |
| "step": 1173000 |
| }, |
| { |
| "epoch": 1.8733243245616205, |
| "grad_norm": 0.00012995305587537587, |
| "learning_rate": 2.9620296897794553e-07, |
| "loss": 0.0, |
| "step": 1173500 |
| }, |
| { |
| "epoch": 1.8741225027996102, |
| "grad_norm": 0.0002586755435913801, |
| "learning_rate": 2.924940973546236e-07, |
| "loss": 0.0, |
| "step": 1174000 |
| }, |
| { |
| "epoch": 1.8749206810375998, |
| "grad_norm": 0.0007729693315923214, |
| "learning_rate": 2.888083637357991e-07, |
| "loss": 0.0, |
| "step": 1174500 |
| }, |
| { |
| "epoch": 1.8757188592755893, |
| "grad_norm": 0.00014659270527772605, |
| "learning_rate": 2.8514577391990536e-07, |
| "loss": 0.0, |
| "step": 1175000 |
| }, |
| { |
| "epoch": 1.8765170375135791, |
| "grad_norm": 0.00019715832604561, |
| "learning_rate": 2.815063336689683e-07, |
| "loss": 0.0, |
| "step": 1175500 |
| }, |
| { |
| "epoch": 1.8773152157515685, |
| "grad_norm": 0.0001621273549972102, |
| "learning_rate": 2.778900487085945e-07, |
| "loss": 0.0, |
| "step": 1176000 |
| }, |
| { |
| "epoch": 1.8781133939895582, |
| "grad_norm": 0.00016097325715236366, |
| "learning_rate": 2.742969247279614e-07, |
| "loss": 0.0, |
| "step": 1176500 |
| }, |
| { |
| "epoch": 1.878911572227548, |
| "grad_norm": 0.0001756875280989334, |
| "learning_rate": 2.707269673798074e-07, |
| "loss": 0.0, |
| "step": 1177000 |
| }, |
| { |
| "epoch": 1.8797097504655373, |
| "grad_norm": 0.00022921212075743824, |
| "learning_rate": 2.671801822804315e-07, |
| "loss": 0.0, |
| "step": 1177500 |
| }, |
| { |
| "epoch": 1.8805079287035271, |
| "grad_norm": 0.0003720010572578758, |
| "learning_rate": 2.63656575009672e-07, |
| "loss": 0.0, |
| "step": 1178000 |
| }, |
| { |
| "epoch": 1.8813061069415167, |
| "grad_norm": 0.00016360900190193206, |
| "learning_rate": 2.6015615111090465e-07, |
| "loss": 0.0, |
| "step": 1178500 |
| }, |
| { |
| "epoch": 1.8821042851795062, |
| "grad_norm": 0.00013911504356656224, |
| "learning_rate": 2.566789160910343e-07, |
| "loss": 0.0, |
| "step": 1179000 |
| }, |
| { |
| "epoch": 1.882902463417496, |
| "grad_norm": 0.00016801197489257902, |
| "learning_rate": 2.532248754204819e-07, |
| "loss": 0.0, |
| "step": 1179500 |
| }, |
| { |
| "epoch": 1.8837006416554856, |
| "grad_norm": 0.00014275613648351282, |
| "learning_rate": 2.497940345331856e-07, |
| "loss": 0.0, |
| "step": 1180000 |
| }, |
| { |
| "epoch": 1.8844988198934751, |
| "grad_norm": 0.00018284647376276553, |
| "learning_rate": 2.4638639882657634e-07, |
| "loss": 0.0, |
| "step": 1180500 |
| }, |
| { |
| "epoch": 1.885296998131465, |
| "grad_norm": 0.0001408082462148741, |
| "learning_rate": 2.430019736615824e-07, |
| "loss": 0.0, |
| "step": 1181000 |
| }, |
| { |
| "epoch": 1.8860951763694542, |
| "grad_norm": 0.00038785370998084545, |
| "learning_rate": 2.396407643626214e-07, |
| "loss": 0.0, |
| "step": 1181500 |
| }, |
| { |
| "epoch": 1.886893354607444, |
| "grad_norm": 0.000422690442064777, |
| "learning_rate": 2.3630277621758178e-07, |
| "loss": 0.0, |
| "step": 1182000 |
| }, |
| { |
| "epoch": 1.8876915328454336, |
| "grad_norm": 0.00020551889610942453, |
| "learning_rate": 2.3298801447782126e-07, |
| "loss": 0.0, |
| "step": 1182500 |
| }, |
| { |
| "epoch": 1.8884897110834231, |
| "grad_norm": 0.00021077659039292485, |
| "learning_rate": 2.2969648435815671e-07, |
| "loss": 0.0, |
| "step": 1183000 |
| }, |
| { |
| "epoch": 1.889287889321413, |
| "grad_norm": 0.00023901699751149863, |
| "learning_rate": 2.2642819103686263e-07, |
| "loss": 0.0, |
| "step": 1183500 |
| }, |
| { |
| "epoch": 1.8900860675594024, |
| "grad_norm": 0.00018016165995504707, |
| "learning_rate": 2.2318313965564773e-07, |
| "loss": 0.0, |
| "step": 1184000 |
| }, |
| { |
| "epoch": 1.890884245797392, |
| "grad_norm": 0.00011153416562592611, |
| "learning_rate": 2.1996133531966668e-07, |
| "loss": 0.0, |
| "step": 1184500 |
| }, |
| { |
| "epoch": 1.8916824240353818, |
| "grad_norm": 0.000145767378853634, |
| "learning_rate": 2.1676278309748997e-07, |
| "loss": 0.0, |
| "step": 1185000 |
| }, |
| { |
| "epoch": 1.8924806022733711, |
| "grad_norm": 0.00012787348532583565, |
| "learning_rate": 2.1358748802111917e-07, |
| "loss": 0.0, |
| "step": 1185500 |
| }, |
| { |
| "epoch": 1.893278780511361, |
| "grad_norm": 0.0002693917485885322, |
| "learning_rate": 2.1043545508596162e-07, |
| "loss": 0.0, |
| "step": 1186000 |
| }, |
| { |
| "epoch": 1.8940769587493504, |
| "grad_norm": 0.00017825972463469952, |
| "learning_rate": 2.0730668925082575e-07, |
| "loss": 0.0, |
| "step": 1186500 |
| }, |
| { |
| "epoch": 1.89487513698734, |
| "grad_norm": 0.0002469649480190128, |
| "learning_rate": 2.0420119543792248e-07, |
| "loss": 0.0, |
| "step": 1187000 |
| }, |
| { |
| "epoch": 1.8956733152253298, |
| "grad_norm": 0.027682464569807053, |
| "learning_rate": 2.0111897853284544e-07, |
| "loss": 0.0, |
| "step": 1187500 |
| }, |
| { |
| "epoch": 1.8964714934633193, |
| "grad_norm": 0.0001725145266391337, |
| "learning_rate": 1.980600433845725e-07, |
| "loss": 0.0, |
| "step": 1188000 |
| }, |
| { |
| "epoch": 1.897269671701309, |
| "grad_norm": 0.00032132607884705067, |
| "learning_rate": 1.9502439480545087e-07, |
| "loss": 0.0, |
| "step": 1188500 |
| }, |
| { |
| "epoch": 1.8980678499392987, |
| "grad_norm": 0.00022941759380046278, |
| "learning_rate": 1.9201203757120034e-07, |
| "loss": 0.0, |
| "step": 1189000 |
| }, |
| { |
| "epoch": 1.898866028177288, |
| "grad_norm": 0.00014870801533106714, |
| "learning_rate": 1.8902297642088673e-07, |
| "loss": 0.0, |
| "step": 1189500 |
| }, |
| { |
| "epoch": 1.8996642064152778, |
| "grad_norm": 0.0001499906793469563, |
| "learning_rate": 1.8605721605693515e-07, |
| "loss": 0.0, |
| "step": 1190000 |
| }, |
| { |
| "epoch": 1.9004623846532673, |
| "grad_norm": 0.00012592818529810756, |
| "learning_rate": 1.8311476114511505e-07, |
| "loss": 0.0, |
| "step": 1190500 |
| }, |
| { |
| "epoch": 1.9012605628912569, |
| "grad_norm": 0.00014124861627351493, |
| "learning_rate": 1.8019561631452185e-07, |
| "loss": 0.0, |
| "step": 1191000 |
| }, |
| { |
| "epoch": 1.9020587411292467, |
| "grad_norm": 0.0001274181850021705, |
| "learning_rate": 1.7729978615758868e-07, |
| "loss": 0.0, |
| "step": 1191500 |
| }, |
| { |
| "epoch": 1.9028569193672362, |
| "grad_norm": 0.0002295897575095296, |
| "learning_rate": 1.744272752300663e-07, |
| "loss": 0.0, |
| "step": 1192000 |
| }, |
| { |
| "epoch": 1.9036550976052258, |
| "grad_norm": 0.00015409973275382072, |
| "learning_rate": 1.7157808805101648e-07, |
| "loss": 0.0, |
| "step": 1192500 |
| }, |
| { |
| "epoch": 1.9044532758432156, |
| "grad_norm": 0.0002241683832835406, |
| "learning_rate": 1.687522291028154e-07, |
| "loss": 0.0, |
| "step": 1193000 |
| }, |
| { |
| "epoch": 1.905251454081205, |
| "grad_norm": 0.0004630073963198811, |
| "learning_rate": 1.659497028311302e-07, |
| "loss": 0.0, |
| "step": 1193500 |
| }, |
| { |
| "epoch": 1.9060496323191947, |
| "grad_norm": 0.00024872427457012236, |
| "learning_rate": 1.631705136449274e-07, |
| "loss": 0.0, |
| "step": 1194000 |
| }, |
| { |
| "epoch": 1.9068478105571844, |
| "grad_norm": 0.0001354358100797981, |
| "learning_rate": 1.6041466591645627e-07, |
| "loss": 0.0, |
| "step": 1194500 |
| }, |
| { |
| "epoch": 1.9076459887951738, |
| "grad_norm": 0.00018827700114343315, |
| "learning_rate": 1.5768216398124535e-07, |
| "loss": 0.0, |
| "step": 1195000 |
| }, |
| { |
| "epoch": 1.9084441670331636, |
| "grad_norm": 0.0002568909549154341, |
| "learning_rate": 1.54973012138096e-07, |
| "loss": 0.0, |
| "step": 1195500 |
| }, |
| { |
| "epoch": 1.909242345271153, |
| "grad_norm": 0.00014703450142405927, |
| "learning_rate": 1.522872146490739e-07, |
| "loss": 0.0, |
| "step": 1196000 |
| }, |
| { |
| "epoch": 1.9100405235091427, |
| "grad_norm": 0.0002241420588688925, |
| "learning_rate": 1.496247757395075e-07, |
| "loss": 0.0, |
| "step": 1196500 |
| }, |
| { |
| "epoch": 1.9108387017471324, |
| "grad_norm": 0.00019378996512386948, |
| "learning_rate": 1.469856995979696e-07, |
| "loss": 0.0, |
| "step": 1197000 |
| }, |
| { |
| "epoch": 1.911636879985122, |
| "grad_norm": 0.0004964235122315586, |
| "learning_rate": 1.4436999037628407e-07, |
| "loss": 0.0, |
| "step": 1197500 |
| }, |
| { |
| "epoch": 1.9124350582231116, |
| "grad_norm": 0.0001157882870757021, |
| "learning_rate": 1.4177765218951422e-07, |
| "loss": 0.0, |
| "step": 1198000 |
| }, |
| { |
| "epoch": 1.9132332364611013, |
| "grad_norm": 0.0001476890465710312, |
| "learning_rate": 1.3920868911595275e-07, |
| "loss": 0.0, |
| "step": 1198500 |
| }, |
| { |
| "epoch": 1.9140314146990907, |
| "grad_norm": 0.0002459689858369529, |
| "learning_rate": 1.3666310519711843e-07, |
| "loss": 0.0, |
| "step": 1199000 |
| }, |
| { |
| "epoch": 1.9148295929370804, |
| "grad_norm": 0.0001660026318859309, |
| "learning_rate": 1.3414090443775285e-07, |
| "loss": 0.0, |
| "step": 1199500 |
| }, |
| { |
| "epoch": 1.91562777117507, |
| "grad_norm": 0.0002628338697832078, |
| "learning_rate": 1.3164209080581025e-07, |
| "loss": 0.0, |
| "step": 1200000 |
| }, |
| { |
| "epoch": 1.91562777117507, |
| "eval_loss": 1.6006262740120292e-05, |
| "eval_runtime": 22149.8885, |
| "eval_samples_per_second": 100.556, |
| "eval_steps_per_second": 3.142, |
| "step": 1200000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1252852, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 150000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 3 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.055996293644091e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|