| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1779, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016863406408094434, |
| "grad_norm": 56.98304098027541, |
| "learning_rate": 2.8089887640449437e-07, |
| "loss": 10.9859, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.003372681281618887, |
| "grad_norm": 55.57657460005535, |
| "learning_rate": 5.617977528089887e-07, |
| "loss": 11.1229, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00505902192242833, |
| "grad_norm": 56.33594802581801, |
| "learning_rate": 8.426966292134832e-07, |
| "loss": 11.0837, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.006745362563237774, |
| "grad_norm": 56.31959427274796, |
| "learning_rate": 1.1235955056179775e-06, |
| "loss": 11.1692, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008431703204047217, |
| "grad_norm": 53.79759651259812, |
| "learning_rate": 1.404494382022472e-06, |
| "loss": 11.2125, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01011804384485666, |
| "grad_norm": 57.26270277410124, |
| "learning_rate": 1.6853932584269663e-06, |
| "loss": 11.042, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.011804384485666104, |
| "grad_norm": 61.96582900530684, |
| "learning_rate": 1.966292134831461e-06, |
| "loss": 10.7828, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.013490725126475547, |
| "grad_norm": 62.57155538626125, |
| "learning_rate": 2.247191011235955e-06, |
| "loss": 10.6918, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01517706576728499, |
| "grad_norm": 82.29395324590898, |
| "learning_rate": 2.5280898876404495e-06, |
| "loss": 9.7507, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.016863406408094434, |
| "grad_norm": 90.9461623535896, |
| "learning_rate": 2.808988764044944e-06, |
| "loss": 9.4252, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01854974704890388, |
| "grad_norm": 99.14771668814727, |
| "learning_rate": 3.089887640449438e-06, |
| "loss": 9.0263, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02023608768971332, |
| "grad_norm": 95.60708207776585, |
| "learning_rate": 3.3707865168539327e-06, |
| "loss": 4.655, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.021922428330522766, |
| "grad_norm": 62.44385087566126, |
| "learning_rate": 3.651685393258427e-06, |
| "loss": 3.5398, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.023608768971332208, |
| "grad_norm": 52.52199287821377, |
| "learning_rate": 3.932584269662922e-06, |
| "loss": 3.1373, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.025295109612141653, |
| "grad_norm": 33.78765944113671, |
| "learning_rate": 4.213483146067416e-06, |
| "loss": 2.3934, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.026981450252951095, |
| "grad_norm": 22.3441016196329, |
| "learning_rate": 4.49438202247191e-06, |
| "loss": 2.0155, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02866779089376054, |
| "grad_norm": 6.116667827314463, |
| "learning_rate": 4.7752808988764044e-06, |
| "loss": 1.3431, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03035413153456998, |
| "grad_norm": 5.066417296011239, |
| "learning_rate": 5.056179775280899e-06, |
| "loss": 1.3379, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03204047217537943, |
| "grad_norm": 4.09740440774978, |
| "learning_rate": 5.3370786516853935e-06, |
| "loss": 1.2552, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03372681281618887, |
| "grad_norm": 2.8692740755186263, |
| "learning_rate": 5.617977528089888e-06, |
| "loss": 1.1335, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03541315345699832, |
| "grad_norm": 2.3265505323675137, |
| "learning_rate": 5.8988764044943826e-06, |
| "loss": 1.1324, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03709949409780776, |
| "grad_norm": 1.798729034462734, |
| "learning_rate": 6.179775280898876e-06, |
| "loss": 1.0814, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0387858347386172, |
| "grad_norm": 51.17680122626074, |
| "learning_rate": 6.460674157303372e-06, |
| "loss": 0.9465, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04047217537942664, |
| "grad_norm": 9.519207965054298, |
| "learning_rate": 6.741573033707865e-06, |
| "loss": 0.9286, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04215851602023609, |
| "grad_norm": 1.784859029228961, |
| "learning_rate": 7.022471910112361e-06, |
| "loss": 0.9436, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04384485666104553, |
| "grad_norm": 1.183273056167306, |
| "learning_rate": 7.303370786516854e-06, |
| "loss": 0.8943, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.045531197301854974, |
| "grad_norm": 1.0093506180093674, |
| "learning_rate": 7.584269662921349e-06, |
| "loss": 0.8803, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.047217537942664416, |
| "grad_norm": 0.9102179680507179, |
| "learning_rate": 7.865168539325843e-06, |
| "loss": 0.8366, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.048903878583473864, |
| "grad_norm": 0.83692942638752, |
| "learning_rate": 8.146067415730338e-06, |
| "loss": 0.8032, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.050590219224283306, |
| "grad_norm": 0.7948758696576288, |
| "learning_rate": 8.426966292134832e-06, |
| "loss": 0.8193, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05227655986509275, |
| "grad_norm": 0.9560450801368573, |
| "learning_rate": 8.707865168539327e-06, |
| "loss": 0.7804, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05396290050590219, |
| "grad_norm": 0.653701463750391, |
| "learning_rate": 8.98876404494382e-06, |
| "loss": 0.7531, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05564924114671164, |
| "grad_norm": 0.70122814315399, |
| "learning_rate": 9.269662921348316e-06, |
| "loss": 0.7444, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05733558178752108, |
| "grad_norm": 0.7091859317644309, |
| "learning_rate": 9.550561797752809e-06, |
| "loss": 0.7139, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05902192242833052, |
| "grad_norm": 0.673989006196913, |
| "learning_rate": 9.831460674157303e-06, |
| "loss": 0.6992, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06070826306913996, |
| "grad_norm": 0.6011374276083163, |
| "learning_rate": 1.0112359550561798e-05, |
| "loss": 0.7038, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06239460370994941, |
| "grad_norm": 0.6205180106169818, |
| "learning_rate": 1.0393258426966292e-05, |
| "loss": 0.6854, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06408094435075885, |
| "grad_norm": 0.5323665662116028, |
| "learning_rate": 1.0674157303370787e-05, |
| "loss": 0.6865, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0657672849915683, |
| "grad_norm": 0.5211714451245653, |
| "learning_rate": 1.0955056179775282e-05, |
| "loss": 0.6914, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.06745362563237774, |
| "grad_norm": 0.6271140400004065, |
| "learning_rate": 1.1235955056179776e-05, |
| "loss": 0.6996, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06913996627318718, |
| "grad_norm": 0.5358305966066623, |
| "learning_rate": 1.151685393258427e-05, |
| "loss": 0.6421, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07082630691399663, |
| "grad_norm": 0.4160609364118775, |
| "learning_rate": 1.1797752808988765e-05, |
| "loss": 0.6366, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07251264755480608, |
| "grad_norm": 0.5973714197954098, |
| "learning_rate": 1.207865168539326e-05, |
| "loss": 0.673, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07419898819561552, |
| "grad_norm": 0.49953757804840393, |
| "learning_rate": 1.2359550561797752e-05, |
| "loss": 0.6259, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07588532883642496, |
| "grad_norm": 0.47704181367251264, |
| "learning_rate": 1.2640449438202249e-05, |
| "loss": 0.6219, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0775716694772344, |
| "grad_norm": 0.372415612114133, |
| "learning_rate": 1.2921348314606743e-05, |
| "loss": 0.6202, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07925801011804384, |
| "grad_norm": 0.3538579138166064, |
| "learning_rate": 1.3202247191011236e-05, |
| "loss": 0.6284, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08094435075885328, |
| "grad_norm": 0.4529401926284446, |
| "learning_rate": 1.348314606741573e-05, |
| "loss": 0.621, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08263069139966273, |
| "grad_norm": 0.6008956943318995, |
| "learning_rate": 1.3764044943820225e-05, |
| "loss": 0.6222, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08431703204047218, |
| "grad_norm": 0.40843012641704357, |
| "learning_rate": 1.4044943820224721e-05, |
| "loss": 0.6483, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08600337268128162, |
| "grad_norm": 0.33729728212338617, |
| "learning_rate": 1.4325842696629212e-05, |
| "loss": 0.6233, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.08768971332209106, |
| "grad_norm": 0.3370599653432775, |
| "learning_rate": 1.4606741573033709e-05, |
| "loss": 0.6106, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0893760539629005, |
| "grad_norm": 0.3191164676272232, |
| "learning_rate": 1.4887640449438203e-05, |
| "loss": 0.5751, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09106239460370995, |
| "grad_norm": 0.36054754880278495, |
| "learning_rate": 1.5168539325842698e-05, |
| "loss": 0.6046, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09274873524451939, |
| "grad_norm": 0.30445133959986886, |
| "learning_rate": 1.544943820224719e-05, |
| "loss": 0.5989, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09443507588532883, |
| "grad_norm": 0.3035359738566352, |
| "learning_rate": 1.5730337078651687e-05, |
| "loss": 0.5789, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09612141652613827, |
| "grad_norm": 0.35195109228569355, |
| "learning_rate": 1.601123595505618e-05, |
| "loss": 0.5895, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.09780775716694773, |
| "grad_norm": 0.31940901935141724, |
| "learning_rate": 1.6292134831460676e-05, |
| "loss": 0.5936, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09949409780775717, |
| "grad_norm": 0.30162419503220067, |
| "learning_rate": 1.657303370786517e-05, |
| "loss": 0.6055, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.10118043844856661, |
| "grad_norm": 0.2963597866280323, |
| "learning_rate": 1.6853932584269665e-05, |
| "loss": 0.605, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10286677908937605, |
| "grad_norm": 0.30817805390280884, |
| "learning_rate": 1.7134831460674158e-05, |
| "loss": 0.5693, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1045531197301855, |
| "grad_norm": 0.2911807010165559, |
| "learning_rate": 1.7415730337078654e-05, |
| "loss": 0.5651, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.10623946037099494, |
| "grad_norm": 0.27943368825325704, |
| "learning_rate": 1.7696629213483147e-05, |
| "loss": 0.5748, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.10792580101180438, |
| "grad_norm": 0.29822593015129506, |
| "learning_rate": 1.797752808988764e-05, |
| "loss": 0.596, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10961214165261383, |
| "grad_norm": 0.27141944390865275, |
| "learning_rate": 1.8258426966292136e-05, |
| "loss": 0.533, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11129848229342328, |
| "grad_norm": 0.2862392624126836, |
| "learning_rate": 1.8539325842696632e-05, |
| "loss": 0.5566, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11298482293423272, |
| "grad_norm": 0.3180895996678809, |
| "learning_rate": 1.8820224719101125e-05, |
| "loss": 0.5508, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.11467116357504216, |
| "grad_norm": 0.33616423964611203, |
| "learning_rate": 1.9101123595505618e-05, |
| "loss": 0.5564, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1163575042158516, |
| "grad_norm": 0.25782088441170614, |
| "learning_rate": 1.9382022471910114e-05, |
| "loss": 0.5538, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.11804384485666104, |
| "grad_norm": 0.3443028051586477, |
| "learning_rate": 1.9662921348314607e-05, |
| "loss": 0.5599, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11973018549747048, |
| "grad_norm": 0.3663521866601868, |
| "learning_rate": 1.99438202247191e-05, |
| "loss": 0.5958, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12141652613827993, |
| "grad_norm": 0.2430774348391263, |
| "learning_rate": 2.0224719101123596e-05, |
| "loss": 0.5405, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12310286677908938, |
| "grad_norm": 0.3645167427473072, |
| "learning_rate": 2.0505617977528092e-05, |
| "loss": 0.5357, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.12478920741989882, |
| "grad_norm": 0.5495193914586647, |
| "learning_rate": 2.0786516853932585e-05, |
| "loss": 0.5809, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12647554806070826, |
| "grad_norm": 0.2886314502296464, |
| "learning_rate": 2.1067415730337078e-05, |
| "loss": 0.5434, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1281618887015177, |
| "grad_norm": 0.40721543977266106, |
| "learning_rate": 2.1348314606741574e-05, |
| "loss": 0.5578, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.12984822934232715, |
| "grad_norm": 0.3893677045739628, |
| "learning_rate": 2.1629213483146067e-05, |
| "loss": 0.5632, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1315345699831366, |
| "grad_norm": 0.3009475566119484, |
| "learning_rate": 2.1910112359550563e-05, |
| "loss": 0.5509, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13322091062394603, |
| "grad_norm": 0.3919989486891414, |
| "learning_rate": 2.2191011235955056e-05, |
| "loss": 0.5489, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.13490725126475547, |
| "grad_norm": 0.3420814355138489, |
| "learning_rate": 2.2471910112359552e-05, |
| "loss": 0.5556, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.13659359190556492, |
| "grad_norm": 0.3491052919845026, |
| "learning_rate": 2.2752808988764045e-05, |
| "loss": 0.5549, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.13827993254637436, |
| "grad_norm": 0.35387991760875304, |
| "learning_rate": 2.303370786516854e-05, |
| "loss": 0.5486, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1399662731871838, |
| "grad_norm": 0.3864441022271678, |
| "learning_rate": 2.3314606741573034e-05, |
| "loss": 0.5384, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.14165261382799327, |
| "grad_norm": 0.3427958684700061, |
| "learning_rate": 2.359550561797753e-05, |
| "loss": 0.5399, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1433389544688027, |
| "grad_norm": 0.3452334549252852, |
| "learning_rate": 2.3876404494382023e-05, |
| "loss": 0.5378, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.14502529510961215, |
| "grad_norm": 0.42336780350074216, |
| "learning_rate": 2.415730337078652e-05, |
| "loss": 0.5416, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1467116357504216, |
| "grad_norm": 0.341238202299115, |
| "learning_rate": 2.4438202247191012e-05, |
| "loss": 0.5587, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.14839797639123103, |
| "grad_norm": 0.357192951048682, |
| "learning_rate": 2.4719101123595505e-05, |
| "loss": 0.5153, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15008431703204048, |
| "grad_norm": 0.429528094646145, |
| "learning_rate": 2.5e-05, |
| "loss": 0.5427, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.15177065767284992, |
| "grad_norm": 0.33591123944757306, |
| "learning_rate": 2.5280898876404497e-05, |
| "loss": 0.5435, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15345699831365936, |
| "grad_norm": 0.430601786111773, |
| "learning_rate": 2.556179775280899e-05, |
| "loss": 0.5322, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1551433389544688, |
| "grad_norm": 0.3358306851062295, |
| "learning_rate": 2.5842696629213486e-05, |
| "loss": 0.5364, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.15682967959527824, |
| "grad_norm": 0.3870041027885968, |
| "learning_rate": 2.6123595505617983e-05, |
| "loss": 0.5162, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.15851602023608768, |
| "grad_norm": 0.3310317022988692, |
| "learning_rate": 2.6404494382022472e-05, |
| "loss": 0.5343, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16020236087689713, |
| "grad_norm": 0.31435605043103565, |
| "learning_rate": 2.6685393258426965e-05, |
| "loss": 0.5458, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.16188870151770657, |
| "grad_norm": 0.31895462371866395, |
| "learning_rate": 2.696629213483146e-05, |
| "loss": 0.5387, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.163575042158516, |
| "grad_norm": 0.33243010294837577, |
| "learning_rate": 2.7247191011235957e-05, |
| "loss": 0.5153, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.16526138279932545, |
| "grad_norm": 0.32802364042835597, |
| "learning_rate": 2.752808988764045e-05, |
| "loss": 0.5429, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.16694772344013492, |
| "grad_norm": 0.3413628299001048, |
| "learning_rate": 2.7808988764044946e-05, |
| "loss": 0.5398, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16863406408094436, |
| "grad_norm": 0.3403836902658499, |
| "learning_rate": 2.8089887640449443e-05, |
| "loss": 0.5197, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1703204047217538, |
| "grad_norm": 0.33690189921055563, |
| "learning_rate": 2.8370786516853936e-05, |
| "loss": 0.5266, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.17200674536256325, |
| "grad_norm": 0.34265834509255605, |
| "learning_rate": 2.8651685393258425e-05, |
| "loss": 0.5123, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1736930860033727, |
| "grad_norm": 0.30290512367105066, |
| "learning_rate": 2.893258426966292e-05, |
| "loss": 0.512, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.17537942664418213, |
| "grad_norm": 0.3152679845470168, |
| "learning_rate": 2.9213483146067417e-05, |
| "loss": 0.5315, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.17706576728499157, |
| "grad_norm": 0.36153831566772826, |
| "learning_rate": 2.949438202247191e-05, |
| "loss": 0.5195, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.178752107925801, |
| "grad_norm": 0.3201065064193262, |
| "learning_rate": 2.9775280898876406e-05, |
| "loss": 0.5415, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.18043844856661045, |
| "grad_norm": 0.35736910108528097, |
| "learning_rate": 3.0056179775280903e-05, |
| "loss": 0.5083, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1821247892074199, |
| "grad_norm": 0.3692448119416969, |
| "learning_rate": 3.0337078651685396e-05, |
| "loss": 0.5213, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.18381112984822934, |
| "grad_norm": 0.3284827088565517, |
| "learning_rate": 3.061797752808989e-05, |
| "loss": 0.512, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.18549747048903878, |
| "grad_norm": 0.34971818895710677, |
| "learning_rate": 3.089887640449438e-05, |
| "loss": 0.5216, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18718381112984822, |
| "grad_norm": 0.359706671456223, |
| "learning_rate": 3.1179775280898874e-05, |
| "loss": 0.5097, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.18887015177065766, |
| "grad_norm": 0.31627697174210256, |
| "learning_rate": 3.1460674157303374e-05, |
| "loss": 0.5163, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1905564924114671, |
| "grad_norm": 0.29216175832212843, |
| "learning_rate": 3.1741573033707866e-05, |
| "loss": 0.5391, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.19224283305227655, |
| "grad_norm": 0.32764391376282487, |
| "learning_rate": 3.202247191011236e-05, |
| "loss": 0.5012, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19392917369308602, |
| "grad_norm": 0.42826805653790595, |
| "learning_rate": 3.230337078651686e-05, |
| "loss": 0.5168, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.19561551433389546, |
| "grad_norm": 0.3433775054706283, |
| "learning_rate": 3.258426966292135e-05, |
| "loss": 0.5172, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1973018549747049, |
| "grad_norm": 0.38194080024414423, |
| "learning_rate": 3.2865168539325845e-05, |
| "loss": 0.5346, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.19898819561551434, |
| "grad_norm": 0.4384292462018469, |
| "learning_rate": 3.314606741573034e-05, |
| "loss": 0.5063, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.20067453625632378, |
| "grad_norm": 0.4221030612516262, |
| "learning_rate": 3.342696629213483e-05, |
| "loss": 0.4992, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.20236087689713322, |
| "grad_norm": 0.5169282239706593, |
| "learning_rate": 3.370786516853933e-05, |
| "loss": 0.5027, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.20404721753794267, |
| "grad_norm": 0.44828094060244733, |
| "learning_rate": 3.398876404494382e-05, |
| "loss": 0.5181, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2057335581787521, |
| "grad_norm": 0.4110052533087039, |
| "learning_rate": 3.4269662921348316e-05, |
| "loss": 0.5261, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.20741989881956155, |
| "grad_norm": 0.3782790860010657, |
| "learning_rate": 3.455056179775281e-05, |
| "loss": 0.4979, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.209106239460371, |
| "grad_norm": 0.35048604392267096, |
| "learning_rate": 3.483146067415731e-05, |
| "loss": 0.5252, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.21079258010118043, |
| "grad_norm": 0.3668221403195122, |
| "learning_rate": 3.51123595505618e-05, |
| "loss": 0.5149, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.21247892074198987, |
| "grad_norm": 0.40260454650546834, |
| "learning_rate": 3.5393258426966294e-05, |
| "loss": 0.5106, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.21416526138279932, |
| "grad_norm": 0.46604542272081867, |
| "learning_rate": 3.5674157303370787e-05, |
| "loss": 0.5155, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.21585160202360876, |
| "grad_norm": 0.3899275125570308, |
| "learning_rate": 3.595505617977528e-05, |
| "loss": 0.5012, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2175379426644182, |
| "grad_norm": 0.49335563725473797, |
| "learning_rate": 3.623595505617978e-05, |
| "loss": 0.5228, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.21922428330522767, |
| "grad_norm": 0.3269454707802011, |
| "learning_rate": 3.651685393258427e-05, |
| "loss": 0.5024, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2209106239460371, |
| "grad_norm": 0.4867838720776066, |
| "learning_rate": 3.6797752808988765e-05, |
| "loss": 0.4735, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.22259696458684655, |
| "grad_norm": 0.4634246367958874, |
| "learning_rate": 3.7078651685393264e-05, |
| "loss": 0.4915, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.224283305227656, |
| "grad_norm": 0.4892729954570698, |
| "learning_rate": 3.735955056179776e-05, |
| "loss": 0.5027, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.22596964586846544, |
| "grad_norm": 0.5748514707897312, |
| "learning_rate": 3.764044943820225e-05, |
| "loss": 0.4915, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.22765598650927488, |
| "grad_norm": 0.479626533442717, |
| "learning_rate": 3.792134831460674e-05, |
| "loss": 0.4874, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.22934232715008432, |
| "grad_norm": 0.5473804829632216, |
| "learning_rate": 3.8202247191011236e-05, |
| "loss": 0.5041, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.23102866779089376, |
| "grad_norm": 0.49996519487113167, |
| "learning_rate": 3.8483146067415735e-05, |
| "loss": 0.4808, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2327150084317032, |
| "grad_norm": 0.5420765917435771, |
| "learning_rate": 3.876404494382023e-05, |
| "loss": 0.5, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.23440134907251264, |
| "grad_norm": 0.5229436999215559, |
| "learning_rate": 3.904494382022472e-05, |
| "loss": 0.4934, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.23608768971332209, |
| "grad_norm": 0.5275954099593283, |
| "learning_rate": 3.9325842696629214e-05, |
| "loss": 0.4989, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23777403035413153, |
| "grad_norm": 0.42305450719450605, |
| "learning_rate": 3.960674157303371e-05, |
| "loss": 0.4933, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.23946037099494097, |
| "grad_norm": 0.40063348620691686, |
| "learning_rate": 3.98876404494382e-05, |
| "loss": 0.4981, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2411467116357504, |
| "grad_norm": 0.4653164060812678, |
| "learning_rate": 4.01685393258427e-05, |
| "loss": 0.5054, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.24283305227655985, |
| "grad_norm": 0.4718439452539601, |
| "learning_rate": 4.044943820224719e-05, |
| "loss": 0.5181, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.24451939291736932, |
| "grad_norm": 0.45311660232496087, |
| "learning_rate": 4.0730337078651685e-05, |
| "loss": 0.4944, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.24620573355817876, |
| "grad_norm": 0.3421077711921803, |
| "learning_rate": 4.1011235955056184e-05, |
| "loss": 0.4946, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2478920741989882, |
| "grad_norm": 0.4741973695340924, |
| "learning_rate": 4.129213483146068e-05, |
| "loss": 0.5048, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.24957841483979765, |
| "grad_norm": 0.40653718235510355, |
| "learning_rate": 4.157303370786517e-05, |
| "loss": 0.4849, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.25126475548060706, |
| "grad_norm": 0.5158190551546549, |
| "learning_rate": 4.185393258426967e-05, |
| "loss": 0.5089, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.25295109612141653, |
| "grad_norm": 0.6037801873244963, |
| "learning_rate": 4.2134831460674156e-05, |
| "loss": 0.4955, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.25463743676222594, |
| "grad_norm": 0.5513324822622154, |
| "learning_rate": 4.2415730337078655e-05, |
| "loss": 0.492, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2563237774030354, |
| "grad_norm": 0.32755920076131817, |
| "learning_rate": 4.269662921348315e-05, |
| "loss": 0.4823, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2580101180438449, |
| "grad_norm": 0.3987104376121614, |
| "learning_rate": 4.297752808988764e-05, |
| "loss": 0.4861, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2596964586846543, |
| "grad_norm": 0.43570907360445954, |
| "learning_rate": 4.3258426966292134e-05, |
| "loss": 0.482, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.26138279932546377, |
| "grad_norm": 0.49586172159275893, |
| "learning_rate": 4.353932584269663e-05, |
| "loss": 0.5087, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2630691399662732, |
| "grad_norm": 0.43200956217260056, |
| "learning_rate": 4.3820224719101126e-05, |
| "loss": 0.5028, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.26475548060708265, |
| "grad_norm": 0.461283086175957, |
| "learning_rate": 4.410112359550562e-05, |
| "loss": 0.4883, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.26644182124789206, |
| "grad_norm": 0.44146587137145016, |
| "learning_rate": 4.438202247191011e-05, |
| "loss": 0.5036, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.26812816188870153, |
| "grad_norm": 0.46992072065252666, |
| "learning_rate": 4.4662921348314605e-05, |
| "loss": 0.5013, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.26981450252951095, |
| "grad_norm": 0.4655499464280253, |
| "learning_rate": 4.4943820224719104e-05, |
| "loss": 0.484, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2715008431703204, |
| "grad_norm": 0.3716614860490022, |
| "learning_rate": 4.52247191011236e-05, |
| "loss": 0.479, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.27318718381112983, |
| "grad_norm": 0.4854406076007183, |
| "learning_rate": 4.550561797752809e-05, |
| "loss": 0.4832, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2748735244519393, |
| "grad_norm": 0.518732815595233, |
| "learning_rate": 4.578651685393259e-05, |
| "loss": 0.5046, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2765598650927487, |
| "grad_norm": 0.6286368875144291, |
| "learning_rate": 4.606741573033708e-05, |
| "loss": 0.5081, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2782462057335582, |
| "grad_norm": 0.4499305788696525, |
| "learning_rate": 4.6348314606741575e-05, |
| "loss": 0.4655, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.2799325463743676, |
| "grad_norm": 0.5216481086847019, |
| "learning_rate": 4.662921348314607e-05, |
| "loss": 0.469, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.28161888701517707, |
| "grad_norm": 0.4372638183735836, |
| "learning_rate": 4.691011235955056e-05, |
| "loss": 0.4874, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.28330522765598654, |
| "grad_norm": 0.6373461982457821, |
| "learning_rate": 4.719101123595506e-05, |
| "loss": 0.5105, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.28499156829679595, |
| "grad_norm": 0.6523204371028702, |
| "learning_rate": 4.747191011235955e-05, |
| "loss": 0.5034, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2866779089376054, |
| "grad_norm": 0.5846990743822145, |
| "learning_rate": 4.7752808988764046e-05, |
| "loss": 0.4764, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.28836424957841483, |
| "grad_norm": 0.42113540601536525, |
| "learning_rate": 4.803370786516854e-05, |
| "loss": 0.478, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2900505902192243, |
| "grad_norm": 0.540824682868501, |
| "learning_rate": 4.831460674157304e-05, |
| "loss": 0.4703, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2917369308600337, |
| "grad_norm": 0.6232981512866863, |
| "learning_rate": 4.859550561797753e-05, |
| "loss": 0.4752, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.2934232715008432, |
| "grad_norm": 0.4306794411707756, |
| "learning_rate": 4.8876404494382024e-05, |
| "loss": 0.4906, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2951096121416526, |
| "grad_norm": 0.5163479053494551, |
| "learning_rate": 4.915730337078652e-05, |
| "loss": 0.4697, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.29679595278246207, |
| "grad_norm": 0.5847728472881939, |
| "learning_rate": 4.943820224719101e-05, |
| "loss": 0.4947, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2984822934232715, |
| "grad_norm": 0.4312351326528099, |
| "learning_rate": 4.971910112359551e-05, |
| "loss": 0.4968, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.30016863406408095, |
| "grad_norm": 0.6653668186872613, |
| "learning_rate": 5e-05, |
| "loss": 0.5176, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.30185497470489037, |
| "grad_norm": 0.8558829245058476, |
| "learning_rate": 4.996876951905059e-05, |
| "loss": 0.4898, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.30354131534569984, |
| "grad_norm": 0.4911849753770153, |
| "learning_rate": 4.993753903810119e-05, |
| "loss": 0.4849, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.30522765598650925, |
| "grad_norm": 0.6143613254381958, |
| "learning_rate": 4.990630855715178e-05, |
| "loss": 0.4593, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3069139966273187, |
| "grad_norm": 0.6563084412266731, |
| "learning_rate": 4.9875078076202377e-05, |
| "loss": 0.4801, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3086003372681282, |
| "grad_norm": 0.4457353642314339, |
| "learning_rate": 4.984384759525297e-05, |
| "loss": 0.4749, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3102866779089376, |
| "grad_norm": 0.4828788641392981, |
| "learning_rate": 4.9812617114303564e-05, |
| "loss": 0.4797, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.31197301854974707, |
| "grad_norm": 0.5791211620153529, |
| "learning_rate": 4.9781386633354154e-05, |
| "loss": 0.4789, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3136593591905565, |
| "grad_norm": 0.43012457307157376, |
| "learning_rate": 4.975015615240475e-05, |
| "loss": 0.4832, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.31534569983136596, |
| "grad_norm": 0.6278035890937275, |
| "learning_rate": 4.971892567145534e-05, |
| "loss": 0.4661, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.31703204047217537, |
| "grad_norm": 0.4457342787463174, |
| "learning_rate": 4.968769519050593e-05, |
| "loss": 0.4695, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.31871838111298484, |
| "grad_norm": 0.4543503799114186, |
| "learning_rate": 4.965646470955653e-05, |
| "loss": 0.4769, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.32040472175379425, |
| "grad_norm": 0.4948122735470389, |
| "learning_rate": 4.962523422860712e-05, |
| "loss": 0.4682, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3220910623946037, |
| "grad_norm": 0.48362708618764566, |
| "learning_rate": 4.959400374765772e-05, |
| "loss": 0.4601, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.32377740303541314, |
| "grad_norm": 0.4130937815682131, |
| "learning_rate": 4.956277326670831e-05, |
| "loss": 0.4719, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3254637436762226, |
| "grad_norm": 0.5198231324227804, |
| "learning_rate": 4.95315427857589e-05, |
| "loss": 0.4722, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.327150084317032, |
| "grad_norm": 0.4651378767011854, |
| "learning_rate": 4.95003123048095e-05, |
| "loss": 0.4535, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3288364249578415, |
| "grad_norm": 0.39247629815392066, |
| "learning_rate": 4.946908182386009e-05, |
| "loss": 0.4832, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3305227655986509, |
| "grad_norm": 0.5667870862895168, |
| "learning_rate": 4.9437851342910686e-05, |
| "loss": 0.4903, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.33220910623946037, |
| "grad_norm": 0.5189515974550599, |
| "learning_rate": 4.9406620861961276e-05, |
| "loss": 0.4922, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.33389544688026984, |
| "grad_norm": 0.41445820324605953, |
| "learning_rate": 4.937539038101187e-05, |
| "loss": 0.4877, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.33558178752107926, |
| "grad_norm": 0.5690605331975456, |
| "learning_rate": 4.9344159900062464e-05, |
| "loss": 0.4861, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3372681281618887, |
| "grad_norm": 0.43127216298664417, |
| "learning_rate": 4.931292941911306e-05, |
| "loss": 0.4736, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.33895446880269814, |
| "grad_norm": 0.5088487302873465, |
| "learning_rate": 4.928169893816365e-05, |
| "loss": 0.4643, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3406408094435076, |
| "grad_norm": 0.4022315371323236, |
| "learning_rate": 4.925046845721424e-05, |
| "loss": 0.4712, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.342327150084317, |
| "grad_norm": 0.5435438730616811, |
| "learning_rate": 4.921923797626484e-05, |
| "loss": 0.4883, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3440134907251265, |
| "grad_norm": 0.5647135768046861, |
| "learning_rate": 4.918800749531543e-05, |
| "loss": 0.4874, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3456998313659359, |
| "grad_norm": 0.3975530293289323, |
| "learning_rate": 4.9156777014366025e-05, |
| "loss": 0.4824, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3473861720067454, |
| "grad_norm": 0.6242457799816916, |
| "learning_rate": 4.9125546533416615e-05, |
| "loss": 0.47, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3490725126475548, |
| "grad_norm": 0.5011673819192746, |
| "learning_rate": 4.909431605246721e-05, |
| "loss": 0.4678, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.35075885328836426, |
| "grad_norm": 0.41505977699214663, |
| "learning_rate": 4.90630855715178e-05, |
| "loss": 0.4623, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3524451939291737, |
| "grad_norm": 0.6222052645158483, |
| "learning_rate": 4.90318550905684e-05, |
| "loss": 0.4939, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.35413153456998314, |
| "grad_norm": 0.4163461583203549, |
| "learning_rate": 4.900062460961899e-05, |
| "loss": 0.4708, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.35581787521079256, |
| "grad_norm": 0.6235836351033357, |
| "learning_rate": 4.896939412866958e-05, |
| "loss": 0.462, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.357504215851602, |
| "grad_norm": 0.3903487157898828, |
| "learning_rate": 4.8938163647720176e-05, |
| "loss": 0.4799, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3591905564924115, |
| "grad_norm": 0.5806378172072378, |
| "learning_rate": 4.8906933166770766e-05, |
| "loss": 0.4917, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3608768971332209, |
| "grad_norm": 0.3985626907622682, |
| "learning_rate": 4.887570268582136e-05, |
| "loss": 0.4815, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3625632377740304, |
| "grad_norm": 0.536198940838733, |
| "learning_rate": 4.8844472204871954e-05, |
| "loss": 0.4729, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3642495784148398, |
| "grad_norm": 0.4329558877769293, |
| "learning_rate": 4.881324172392255e-05, |
| "loss": 0.4825, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.36593591905564926, |
| "grad_norm": 0.4094546659846234, |
| "learning_rate": 4.878201124297315e-05, |
| "loss": 0.4709, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3676222596964587, |
| "grad_norm": 0.4900231997961044, |
| "learning_rate": 4.875078076202374e-05, |
| "loss": 0.4806, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.36930860033726814, |
| "grad_norm": 1.3182340985233345, |
| "learning_rate": 4.8719550281074335e-05, |
| "loss": 0.4983, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.37099494097807756, |
| "grad_norm": 0.5623706911628698, |
| "learning_rate": 4.8688319800124925e-05, |
| "loss": 0.4746, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.37268128161888703, |
| "grad_norm": 0.7551184358602725, |
| "learning_rate": 4.865708931917552e-05, |
| "loss": 0.4876, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.37436762225969644, |
| "grad_norm": 0.5124638039352849, |
| "learning_rate": 4.862585883822611e-05, |
| "loss": 0.4817, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3760539629005059, |
| "grad_norm": 0.5748662022626354, |
| "learning_rate": 4.859462835727671e-05, |
| "loss": 0.4873, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3777403035413153, |
| "grad_norm": 0.4995856424465425, |
| "learning_rate": 4.85633978763273e-05, |
| "loss": 0.4841, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3794266441821248, |
| "grad_norm": 0.5492555776305382, |
| "learning_rate": 4.853216739537789e-05, |
| "loss": 0.4572, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3811129848229342, |
| "grad_norm": 0.6217180425514838, |
| "learning_rate": 4.8500936914428486e-05, |
| "loss": 0.4873, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3827993254637437, |
| "grad_norm": 0.753543701418373, |
| "learning_rate": 4.8469706433479076e-05, |
| "loss": 0.4677, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3844856661045531, |
| "grad_norm": 0.39884775752702056, |
| "learning_rate": 4.843847595252967e-05, |
| "loss": 0.4853, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.38617200674536256, |
| "grad_norm": 0.8356414158467076, |
| "learning_rate": 4.840724547158026e-05, |
| "loss": 0.4842, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.38785834738617203, |
| "grad_norm": 0.4690693833719156, |
| "learning_rate": 4.837601499063086e-05, |
| "loss": 0.461, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.38954468802698144, |
| "grad_norm": 0.5808095607024085, |
| "learning_rate": 4.834478450968145e-05, |
| "loss": 0.4627, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3912310286677909, |
| "grad_norm": 0.9887565739758644, |
| "learning_rate": 4.831355402873205e-05, |
| "loss": 0.5001, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.39291736930860033, |
| "grad_norm": 0.3624233562603427, |
| "learning_rate": 4.828232354778264e-05, |
| "loss": 0.4391, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3946037099494098, |
| "grad_norm": 0.5710016977364857, |
| "learning_rate": 4.825109306683323e-05, |
| "loss": 0.4783, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3962900505902192, |
| "grad_norm": 0.40833038130567695, |
| "learning_rate": 4.8219862585883825e-05, |
| "loss": 0.4642, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3979763912310287, |
| "grad_norm": 0.47027421121390883, |
| "learning_rate": 4.8188632104934415e-05, |
| "loss": 0.4698, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3996627318718381, |
| "grad_norm": 0.4210977303820093, |
| "learning_rate": 4.815740162398501e-05, |
| "loss": 0.4651, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.40134907251264756, |
| "grad_norm": 0.43664310696121306, |
| "learning_rate": 4.81261711430356e-05, |
| "loss": 0.4669, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.403035413153457, |
| "grad_norm": 0.5887659356197822, |
| "learning_rate": 4.80949406620862e-05, |
| "loss": 0.4751, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.40472175379426645, |
| "grad_norm": 0.4374894796331959, |
| "learning_rate": 4.806371018113679e-05, |
| "loss": 0.4532, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.40640809443507586, |
| "grad_norm": 0.5253351044852949, |
| "learning_rate": 4.8032479700187386e-05, |
| "loss": 0.471, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.40809443507588533, |
| "grad_norm": 0.45194720426123525, |
| "learning_rate": 4.8001249219237976e-05, |
| "loss": 0.4981, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.40978077571669475, |
| "grad_norm": 0.4873740274970084, |
| "learning_rate": 4.797001873828857e-05, |
| "loss": 0.4761, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4114671163575042, |
| "grad_norm": 0.464630536061398, |
| "learning_rate": 4.793878825733917e-05, |
| "loss": 0.4561, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4131534569983137, |
| "grad_norm": 0.386296619575784, |
| "learning_rate": 4.790755777638976e-05, |
| "loss": 0.4658, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4148397976391231, |
| "grad_norm": 0.5513350789766529, |
| "learning_rate": 4.787632729544036e-05, |
| "loss": 0.4735, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.41652613827993257, |
| "grad_norm": 0.39229874724094077, |
| "learning_rate": 4.784509681449095e-05, |
| "loss": 0.4645, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.418212478920742, |
| "grad_norm": 0.6182543773392607, |
| "learning_rate": 4.781386633354154e-05, |
| "loss": 0.4798, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.41989881956155145, |
| "grad_norm": 0.5517604855595091, |
| "learning_rate": 4.7782635852592134e-05, |
| "loss": 0.4535, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.42158516020236086, |
| "grad_norm": 0.44676355561808895, |
| "learning_rate": 4.7751405371642724e-05, |
| "loss": 0.4565, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.42327150084317033, |
| "grad_norm": 0.545302526098748, |
| "learning_rate": 4.772017489069332e-05, |
| "loss": 0.4518, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.42495784148397975, |
| "grad_norm": 0.38455720972294166, |
| "learning_rate": 4.768894440974391e-05, |
| "loss": 0.4812, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4266441821247892, |
| "grad_norm": 0.6308884761349483, |
| "learning_rate": 4.765771392879451e-05, |
| "loss": 0.4763, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.42833052276559863, |
| "grad_norm": 0.4252859223489624, |
| "learning_rate": 4.76264834478451e-05, |
| "loss": 0.4708, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.4300168634064081, |
| "grad_norm": 0.4588786362077238, |
| "learning_rate": 4.7595252966895696e-05, |
| "loss": 0.4524, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4317032040472175, |
| "grad_norm": 0.5056357623998745, |
| "learning_rate": 4.7564022485946286e-05, |
| "loss": 0.4857, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.433389544688027, |
| "grad_norm": 0.36885042655172334, |
| "learning_rate": 4.7532792004996876e-05, |
| "loss": 0.4532, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4350758853288364, |
| "grad_norm": 0.47099160176857435, |
| "learning_rate": 4.750156152404747e-05, |
| "loss": 0.4621, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.43676222596964587, |
| "grad_norm": 0.4003081993445989, |
| "learning_rate": 4.747033104309806e-05, |
| "loss": 0.4538, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.43844856661045534, |
| "grad_norm": 0.34772689545249436, |
| "learning_rate": 4.743910056214866e-05, |
| "loss": 0.4617, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.44013490725126475, |
| "grad_norm": 0.5217549125371695, |
| "learning_rate": 4.740787008119925e-05, |
| "loss": 0.4809, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4418212478920742, |
| "grad_norm": 0.3865859039529573, |
| "learning_rate": 4.737663960024985e-05, |
| "loss": 0.4723, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.44350758853288363, |
| "grad_norm": 0.41060711895278124, |
| "learning_rate": 4.734540911930044e-05, |
| "loss": 0.4633, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4451939291736931, |
| "grad_norm": 0.35590597776520944, |
| "learning_rate": 4.7314178638351034e-05, |
| "loss": 0.4743, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.4468802698145025, |
| "grad_norm": 0.44077025461728286, |
| "learning_rate": 4.7282948157401624e-05, |
| "loss": 0.4618, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.448566610455312, |
| "grad_norm": 0.3914683036988655, |
| "learning_rate": 4.7251717676452214e-05, |
| "loss": 0.4837, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4502529510961214, |
| "grad_norm": 0.39989639702012575, |
| "learning_rate": 4.722048719550281e-05, |
| "loss": 0.473, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.45193929173693087, |
| "grad_norm": 0.35245741880842596, |
| "learning_rate": 4.71892567145534e-05, |
| "loss": 0.4687, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4536256323777403, |
| "grad_norm": 0.4526741581262521, |
| "learning_rate": 4.7158026233604005e-05, |
| "loss": 0.4515, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.45531197301854975, |
| "grad_norm": 0.365110364415945, |
| "learning_rate": 4.7126795752654595e-05, |
| "loss": 0.47, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.45699831365935917, |
| "grad_norm": 0.46637818011624854, |
| "learning_rate": 4.7095565271705186e-05, |
| "loss": 0.4474, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.45868465430016864, |
| "grad_norm": 0.44190547233976, |
| "learning_rate": 4.706433479075578e-05, |
| "loss": 0.4803, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.46037099494097805, |
| "grad_norm": 0.4875433864770596, |
| "learning_rate": 4.703310430980637e-05, |
| "loss": 0.4593, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4620573355817875, |
| "grad_norm": 0.39643388270510754, |
| "learning_rate": 4.700187382885697e-05, |
| "loss": 0.4669, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.463743676222597, |
| "grad_norm": 0.5625953425266915, |
| "learning_rate": 4.697064334790756e-05, |
| "loss": 0.444, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4654300168634064, |
| "grad_norm": 0.3682394173749922, |
| "learning_rate": 4.693941286695816e-05, |
| "loss": 0.4582, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4671163575042159, |
| "grad_norm": 0.5666864728568041, |
| "learning_rate": 4.690818238600875e-05, |
| "loss": 0.4627, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4688026981450253, |
| "grad_norm": 0.4064111657157596, |
| "learning_rate": 4.6876951905059344e-05, |
| "loss": 0.4824, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.47048903878583476, |
| "grad_norm": 0.45845078406278744, |
| "learning_rate": 4.6845721424109934e-05, |
| "loss": 0.4478, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.47217537942664417, |
| "grad_norm": 0.548590898565881, |
| "learning_rate": 4.6814490943160524e-05, |
| "loss": 0.4594, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.47386172006745364, |
| "grad_norm": 0.3886498640345789, |
| "learning_rate": 4.678326046221112e-05, |
| "loss": 0.4453, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.47554806070826305, |
| "grad_norm": 0.5103744392911466, |
| "learning_rate": 4.675202998126171e-05, |
| "loss": 0.4448, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4772344013490725, |
| "grad_norm": 0.4518474291293266, |
| "learning_rate": 4.672079950031231e-05, |
| "loss": 0.46, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.47892074198988194, |
| "grad_norm": 0.5456343340353047, |
| "learning_rate": 4.66895690193629e-05, |
| "loss": 0.4566, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.4806070826306914, |
| "grad_norm": 0.4566447835972022, |
| "learning_rate": 4.6658338538413495e-05, |
| "loss": 0.4603, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4822934232715008, |
| "grad_norm": 0.4746694572858959, |
| "learning_rate": 4.6627108057464085e-05, |
| "loss": 0.4512, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4839797639123103, |
| "grad_norm": 0.5188723656216718, |
| "learning_rate": 4.659587757651468e-05, |
| "loss": 0.4825, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.4856661045531197, |
| "grad_norm": 0.4402398441258149, |
| "learning_rate": 4.656464709556527e-05, |
| "loss": 0.4304, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4873524451939292, |
| "grad_norm": 0.5613408976197366, |
| "learning_rate": 4.653341661461586e-05, |
| "loss": 0.4701, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.48903878583473864, |
| "grad_norm": 0.5401648721182448, |
| "learning_rate": 4.650218613366646e-05, |
| "loss": 0.4551, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.49072512647554806, |
| "grad_norm": 0.5791278786888271, |
| "learning_rate": 4.647095565271705e-05, |
| "loss": 0.4511, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4924114671163575, |
| "grad_norm": 0.5246744706582873, |
| "learning_rate": 4.643972517176765e-05, |
| "loss": 0.4549, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.49409780775716694, |
| "grad_norm": 0.7164081142152631, |
| "learning_rate": 4.640849469081824e-05, |
| "loss": 0.4674, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4957841483979764, |
| "grad_norm": 0.40548449726070296, |
| "learning_rate": 4.6377264209868834e-05, |
| "loss": 0.4571, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4974704890387858, |
| "grad_norm": 0.6593403514913315, |
| "learning_rate": 4.634603372891943e-05, |
| "loss": 0.462, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4991568296795953, |
| "grad_norm": 0.39387605629333194, |
| "learning_rate": 4.631480324797002e-05, |
| "loss": 0.4535, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5008431703204047, |
| "grad_norm": 0.6989949061990584, |
| "learning_rate": 4.628357276702062e-05, |
| "loss": 0.4548, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5025295109612141, |
| "grad_norm": 0.5728394753919458, |
| "learning_rate": 4.625234228607121e-05, |
| "loss": 0.4644, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5042158516020236, |
| "grad_norm": 0.5792715131221312, |
| "learning_rate": 4.6221111805121805e-05, |
| "loss": 0.4451, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5059021922428331, |
| "grad_norm": 0.4586042579159975, |
| "learning_rate": 4.6189881324172395e-05, |
| "loss": 0.4397, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5075885328836425, |
| "grad_norm": 0.5912133606348081, |
| "learning_rate": 4.615865084322299e-05, |
| "loss": 0.4581, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5092748735244519, |
| "grad_norm": 0.4840692893259534, |
| "learning_rate": 4.612742036227358e-05, |
| "loss": 0.47, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5109612141652614, |
| "grad_norm": 0.628029656964694, |
| "learning_rate": 4.609618988132417e-05, |
| "loss": 0.4657, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5126475548060708, |
| "grad_norm": 0.5119987303703423, |
| "learning_rate": 4.606495940037477e-05, |
| "loss": 0.45, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5143338954468802, |
| "grad_norm": 0.5538879969713244, |
| "learning_rate": 4.603372891942536e-05, |
| "loss": 0.4494, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5160202360876898, |
| "grad_norm": 0.6059271546930384, |
| "learning_rate": 4.6002498438475956e-05, |
| "loss": 0.451, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5177065767284992, |
| "grad_norm": 0.40827887502082827, |
| "learning_rate": 4.5971267957526547e-05, |
| "loss": 0.4539, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5193929173693086, |
| "grad_norm": 0.5394798456701918, |
| "learning_rate": 4.5940037476577143e-05, |
| "loss": 0.4657, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.521079258010118, |
| "grad_norm": 0.37887469267167845, |
| "learning_rate": 4.5908806995627734e-05, |
| "loss": 0.4708, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5227655986509275, |
| "grad_norm": 0.47009884438847327, |
| "learning_rate": 4.587757651467833e-05, |
| "loss": 0.4557, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.524451939291737, |
| "grad_norm": 0.4408946597680707, |
| "learning_rate": 4.584634603372892e-05, |
| "loss": 0.454, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5261382799325464, |
| "grad_norm": 0.43048975043678045, |
| "learning_rate": 4.581511555277951e-05, |
| "loss": 0.4565, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5278246205733558, |
| "grad_norm": 0.5519067163880373, |
| "learning_rate": 4.578388507183011e-05, |
| "loss": 0.4584, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5295109612141653, |
| "grad_norm": 0.4933302641718265, |
| "learning_rate": 4.57526545908807e-05, |
| "loss": 0.4631, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5311973018549747, |
| "grad_norm": 0.4571212964029398, |
| "learning_rate": 4.5721424109931295e-05, |
| "loss": 0.4403, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5328836424957841, |
| "grad_norm": 0.457978654022723, |
| "learning_rate": 4.5690193628981885e-05, |
| "loss": 0.4567, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5345699831365935, |
| "grad_norm": 0.4789628002820523, |
| "learning_rate": 4.565896314803248e-05, |
| "loss": 0.4647, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5362563237774031, |
| "grad_norm": 0.5030611987631772, |
| "learning_rate": 4.562773266708307e-05, |
| "loss": 0.4472, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5379426644182125, |
| "grad_norm": 0.537594026859627, |
| "learning_rate": 4.559650218613367e-05, |
| "loss": 0.4537, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5396290050590219, |
| "grad_norm": 0.545365577796258, |
| "learning_rate": 4.556527170518426e-05, |
| "loss": 0.452, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5413153456998314, |
| "grad_norm": 0.5272307911586873, |
| "learning_rate": 4.5534041224234856e-05, |
| "loss": 0.4507, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5430016863406408, |
| "grad_norm": 0.43739398109220595, |
| "learning_rate": 4.550281074328545e-05, |
| "loss": 0.4607, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5446880269814502, |
| "grad_norm": 0.5883556732443432, |
| "learning_rate": 4.547158026233604e-05, |
| "loss": 0.4674, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5463743676222597, |
| "grad_norm": 0.4259537371040564, |
| "learning_rate": 4.544034978138664e-05, |
| "loss": 0.4711, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5480607082630692, |
| "grad_norm": 0.47670365038826346, |
| "learning_rate": 4.540911930043723e-05, |
| "loss": 0.453, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5497470489038786, |
| "grad_norm": 0.3898528313656299, |
| "learning_rate": 4.537788881948782e-05, |
| "loss": 0.4404, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.551433389544688, |
| "grad_norm": 0.39330434537215003, |
| "learning_rate": 4.534665833853842e-05, |
| "loss": 0.4659, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5531197301854974, |
| "grad_norm": 0.5271047700192718, |
| "learning_rate": 4.531542785758901e-05, |
| "loss": 0.4679, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.554806070826307, |
| "grad_norm": 0.43702194412823087, |
| "learning_rate": 4.5284197376639605e-05, |
| "loss": 0.4458, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5564924114671164, |
| "grad_norm": 0.4942049663231375, |
| "learning_rate": 4.5252966895690195e-05, |
| "loss": 0.4782, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5581787521079258, |
| "grad_norm": 0.47280783766806017, |
| "learning_rate": 4.522173641474079e-05, |
| "loss": 0.469, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5598650927487352, |
| "grad_norm": 0.49600299133397724, |
| "learning_rate": 4.519050593379138e-05, |
| "loss": 0.4442, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5615514333895447, |
| "grad_norm": 0.3778585896882259, |
| "learning_rate": 4.515927545284198e-05, |
| "loss": 0.4349, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5632377740303541, |
| "grad_norm": 0.6634881870711851, |
| "learning_rate": 4.512804497189257e-05, |
| "loss": 0.4493, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5649241146711635, |
| "grad_norm": 0.3529106630733103, |
| "learning_rate": 4.509681449094316e-05, |
| "loss": 0.4597, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5666104553119731, |
| "grad_norm": 0.5092975219112303, |
| "learning_rate": 4.5065584009993756e-05, |
| "loss": 0.4467, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5682967959527825, |
| "grad_norm": 0.4391198480506805, |
| "learning_rate": 4.5034353529044346e-05, |
| "loss": 0.4532, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5699831365935919, |
| "grad_norm": 0.5198814283532447, |
| "learning_rate": 4.500312304809494e-05, |
| "loss": 0.4421, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5716694772344013, |
| "grad_norm": 0.46667997995117966, |
| "learning_rate": 4.497189256714553e-05, |
| "loss": 0.4549, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5733558178752108, |
| "grad_norm": 0.4975478516344748, |
| "learning_rate": 4.494066208619613e-05, |
| "loss": 0.4443, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5750421585160203, |
| "grad_norm": 0.3894373802203641, |
| "learning_rate": 4.490943160524672e-05, |
| "loss": 0.4493, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5767284991568297, |
| "grad_norm": 0.4992453837229512, |
| "learning_rate": 4.487820112429732e-05, |
| "loss": 0.4429, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5784148397976391, |
| "grad_norm": 0.3647456589580748, |
| "learning_rate": 4.484697064334791e-05, |
| "loss": 0.4382, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5801011804384486, |
| "grad_norm": 0.4516326357024824, |
| "learning_rate": 4.48157401623985e-05, |
| "loss": 0.4705, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.581787521079258, |
| "grad_norm": 0.35544882261647254, |
| "learning_rate": 4.4784509681449095e-05, |
| "loss": 0.4627, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5834738617200674, |
| "grad_norm": 0.37857935514451707, |
| "learning_rate": 4.475327920049969e-05, |
| "loss": 0.4502, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5851602023608768, |
| "grad_norm": 0.3829292936307298, |
| "learning_rate": 4.472204871955029e-05, |
| "loss": 0.4521, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5868465430016864, |
| "grad_norm": 0.3960422907404094, |
| "learning_rate": 4.469081823860088e-05, |
| "loss": 0.4592, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5885328836424958, |
| "grad_norm": 0.5180788477319964, |
| "learning_rate": 4.465958775765147e-05, |
| "loss": 0.4622, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.5902192242833052, |
| "grad_norm": 0.383759399659539, |
| "learning_rate": 4.4628357276702066e-05, |
| "loss": 0.4565, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5919055649241147, |
| "grad_norm": 0.4755348626514461, |
| "learning_rate": 4.4597126795752656e-05, |
| "loss": 0.461, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5935919055649241, |
| "grad_norm": 0.40158049133043633, |
| "learning_rate": 4.456589631480325e-05, |
| "loss": 0.4537, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5952782462057336, |
| "grad_norm": 0.4375462386612478, |
| "learning_rate": 4.453466583385384e-05, |
| "loss": 0.452, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.596964586846543, |
| "grad_norm": 0.4109536436881464, |
| "learning_rate": 4.450343535290444e-05, |
| "loss": 0.4544, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5986509274873525, |
| "grad_norm": 0.4129498203971123, |
| "learning_rate": 4.447220487195503e-05, |
| "loss": 0.4541, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6003372681281619, |
| "grad_norm": 0.3642496853825518, |
| "learning_rate": 4.444097439100563e-05, |
| "loss": 0.4276, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6020236087689713, |
| "grad_norm": 0.3885225374210196, |
| "learning_rate": 4.440974391005622e-05, |
| "loss": 0.4401, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6037099494097807, |
| "grad_norm": 0.4330664385703508, |
| "learning_rate": 4.437851342910681e-05, |
| "loss": 0.4631, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6053962900505903, |
| "grad_norm": 0.41560861442665614, |
| "learning_rate": 4.4347282948157404e-05, |
| "loss": 0.4576, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6070826306913997, |
| "grad_norm": 0.36879647476749433, |
| "learning_rate": 4.4316052467207994e-05, |
| "loss": 0.4561, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6087689713322091, |
| "grad_norm": 0.43808306623569127, |
| "learning_rate": 4.428482198625859e-05, |
| "loss": 0.4575, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6104553119730185, |
| "grad_norm": 0.3525734460941292, |
| "learning_rate": 4.425359150530918e-05, |
| "loss": 0.4326, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.612141652613828, |
| "grad_norm": 0.45284917798855845, |
| "learning_rate": 4.422236102435978e-05, |
| "loss": 0.4624, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6138279932546374, |
| "grad_norm": 0.42565642217457994, |
| "learning_rate": 4.419113054341037e-05, |
| "loss": 0.4602, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6155143338954469, |
| "grad_norm": 0.40695683541052846, |
| "learning_rate": 4.4159900062460966e-05, |
| "loss": 0.4602, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6172006745362564, |
| "grad_norm": 0.39479177428957435, |
| "learning_rate": 4.4128669581511556e-05, |
| "loss": 0.4303, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6188870151770658, |
| "grad_norm": 0.4103584712960603, |
| "learning_rate": 4.4097439100562146e-05, |
| "loss": 0.4641, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6205733558178752, |
| "grad_norm": 0.40114012742268623, |
| "learning_rate": 4.406620861961274e-05, |
| "loss": 0.4543, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6222596964586846, |
| "grad_norm": 0.4671847767961667, |
| "learning_rate": 4.403497813866333e-05, |
| "loss": 0.4576, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.6239460370994941, |
| "grad_norm": 0.4214940061838059, |
| "learning_rate": 4.400374765771393e-05, |
| "loss": 0.4655, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6256323777403036, |
| "grad_norm": 0.4036379096633672, |
| "learning_rate": 4.397251717676452e-05, |
| "loss": 0.4621, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.627318718381113, |
| "grad_norm": 0.43480686992150475, |
| "learning_rate": 4.394128669581512e-05, |
| "loss": 0.4472, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6290050590219224, |
| "grad_norm": 0.34172767802821746, |
| "learning_rate": 4.3910056214865714e-05, |
| "loss": 0.44, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6306913996627319, |
| "grad_norm": 0.4519665422947456, |
| "learning_rate": 4.3878825733916304e-05, |
| "loss": 0.4242, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6323777403035413, |
| "grad_norm": 0.33989642815100785, |
| "learning_rate": 4.38475952529669e-05, |
| "loss": 0.4621, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6340640809443507, |
| "grad_norm": 0.5051501147469363, |
| "learning_rate": 4.381636477201749e-05, |
| "loss": 0.447, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6357504215851602, |
| "grad_norm": 0.3114342404571123, |
| "learning_rate": 4.378513429106809e-05, |
| "loss": 0.4605, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6374367622259697, |
| "grad_norm": 0.46355812533549084, |
| "learning_rate": 4.375390381011868e-05, |
| "loss": 0.4493, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6391231028667791, |
| "grad_norm": 0.34338903987416625, |
| "learning_rate": 4.3722673329169275e-05, |
| "loss": 0.4284, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6408094435075885, |
| "grad_norm": 0.3546403564873265, |
| "learning_rate": 4.3691442848219865e-05, |
| "loss": 0.4304, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.642495784148398, |
| "grad_norm": 0.3342737589074633, |
| "learning_rate": 4.3660212367270456e-05, |
| "loss": 0.4511, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6441821247892074, |
| "grad_norm": 0.38340072496867783, |
| "learning_rate": 4.362898188632105e-05, |
| "loss": 0.4739, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6458684654300169, |
| "grad_norm": 0.3188919441604891, |
| "learning_rate": 4.359775140537164e-05, |
| "loss": 0.4413, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6475548060708263, |
| "grad_norm": 0.40349853629853805, |
| "learning_rate": 4.356652092442224e-05, |
| "loss": 0.4294, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6492411467116358, |
| "grad_norm": 0.3418001692662577, |
| "learning_rate": 4.353529044347283e-05, |
| "loss": 0.4579, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6509274873524452, |
| "grad_norm": 0.4179039862512628, |
| "learning_rate": 4.350405996252343e-05, |
| "loss": 0.4718, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6526138279932546, |
| "grad_norm": 0.3656172176030232, |
| "learning_rate": 4.347282948157402e-05, |
| "loss": 0.4473, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.654300168634064, |
| "grad_norm": 0.3569964633246004, |
| "learning_rate": 4.3441599000624614e-05, |
| "loss": 0.4447, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6559865092748736, |
| "grad_norm": 0.43024795155564693, |
| "learning_rate": 4.3410368519675204e-05, |
| "loss": 0.4364, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.657672849915683, |
| "grad_norm": 0.3957910007344399, |
| "learning_rate": 4.3379138038725794e-05, |
| "loss": 0.4549, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6593591905564924, |
| "grad_norm": 0.4205619540503897, |
| "learning_rate": 4.334790755777639e-05, |
| "loss": 0.4265, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6610455311973018, |
| "grad_norm": 0.3131668807477272, |
| "learning_rate": 4.331667707682698e-05, |
| "loss": 0.439, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6627318718381113, |
| "grad_norm": 0.4493871641528794, |
| "learning_rate": 4.328544659587758e-05, |
| "loss": 0.4548, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6644182124789207, |
| "grad_norm": 0.33758428285308034, |
| "learning_rate": 4.325421611492817e-05, |
| "loss": 0.4636, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6661045531197302, |
| "grad_norm": 0.42168546778015187, |
| "learning_rate": 4.3222985633978765e-05, |
| "loss": 0.4327, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6677908937605397, |
| "grad_norm": 0.3204127281970423, |
| "learning_rate": 4.3191755153029355e-05, |
| "loss": 0.4508, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6694772344013491, |
| "grad_norm": 0.35319093847245864, |
| "learning_rate": 4.316052467207995e-05, |
| "loss": 0.4475, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6711635750421585, |
| "grad_norm": 0.3376608430123609, |
| "learning_rate": 4.312929419113055e-05, |
| "loss": 0.459, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6728499156829679, |
| "grad_norm": 0.41689185294932124, |
| "learning_rate": 4.309806371018114e-05, |
| "loss": 0.4469, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.6745362563237775, |
| "grad_norm": 0.30877204324968244, |
| "learning_rate": 4.3066833229231736e-05, |
| "loss": 0.4435, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6762225969645869, |
| "grad_norm": 0.37792173543475427, |
| "learning_rate": 4.3035602748282327e-05, |
| "loss": 0.4452, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6779089376053963, |
| "grad_norm": 0.3780637092799853, |
| "learning_rate": 4.3004372267332924e-05, |
| "loss": 0.4491, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6795952782462057, |
| "grad_norm": 0.3753855323716615, |
| "learning_rate": 4.2973141786383514e-05, |
| "loss": 0.4597, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6812816188870152, |
| "grad_norm": 0.3547964801934976, |
| "learning_rate": 4.2941911305434104e-05, |
| "loss": 0.4471, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6829679595278246, |
| "grad_norm": 0.37366732354500176, |
| "learning_rate": 4.29106808244847e-05, |
| "loss": 0.4285, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.684654300168634, |
| "grad_norm": 0.3527414732450224, |
| "learning_rate": 4.287945034353529e-05, |
| "loss": 0.4419, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6863406408094435, |
| "grad_norm": 0.36437242290240707, |
| "learning_rate": 4.284821986258589e-05, |
| "loss": 0.4323, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.688026981450253, |
| "grad_norm": 0.3674294362180389, |
| "learning_rate": 4.281698938163648e-05, |
| "loss": 0.4364, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6897133220910624, |
| "grad_norm": 0.31810632269177597, |
| "learning_rate": 4.2785758900687075e-05, |
| "loss": 0.4389, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.6913996627318718, |
| "grad_norm": 0.38692488885344895, |
| "learning_rate": 4.2754528419737665e-05, |
| "loss": 0.4563, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6930860033726813, |
| "grad_norm": 0.3583081987194819, |
| "learning_rate": 4.272329793878826e-05, |
| "loss": 0.4497, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6947723440134908, |
| "grad_norm": 0.39986326119698185, |
| "learning_rate": 4.269206745783885e-05, |
| "loss": 0.4478, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6964586846543002, |
| "grad_norm": 0.36531474130869634, |
| "learning_rate": 4.266083697688944e-05, |
| "loss": 0.4441, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6981450252951096, |
| "grad_norm": 0.36687118909418154, |
| "learning_rate": 4.262960649594004e-05, |
| "loss": 0.4318, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6998313659359191, |
| "grad_norm": 0.4182108090774202, |
| "learning_rate": 4.259837601499063e-05, |
| "loss": 0.4635, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7015177065767285, |
| "grad_norm": 0.3153963333187026, |
| "learning_rate": 4.2567145534041226e-05, |
| "loss": 0.438, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7032040472175379, |
| "grad_norm": 0.44907938165023165, |
| "learning_rate": 4.2535915053091817e-05, |
| "loss": 0.4354, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7048903878583473, |
| "grad_norm": 0.32544898293977376, |
| "learning_rate": 4.2504684572142414e-05, |
| "loss": 0.4595, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7065767284991569, |
| "grad_norm": 0.3887718081828085, |
| "learning_rate": 4.2473454091193004e-05, |
| "loss": 0.4324, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7082630691399663, |
| "grad_norm": 0.3801064013683576, |
| "learning_rate": 4.24422236102436e-05, |
| "loss": 0.4547, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7099494097807757, |
| "grad_norm": 0.36710077046278583, |
| "learning_rate": 4.241099312929419e-05, |
| "loss": 0.43, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7116357504215851, |
| "grad_norm": 0.3030182320062759, |
| "learning_rate": 4.237976264834478e-05, |
| "loss": 0.4428, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7133220910623946, |
| "grad_norm": 0.3905227942500751, |
| "learning_rate": 4.234853216739538e-05, |
| "loss": 0.4433, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.715008431703204, |
| "grad_norm": 0.3360133794872185, |
| "learning_rate": 4.2317301686445975e-05, |
| "loss": 0.4406, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7166947723440135, |
| "grad_norm": 0.4107242343136471, |
| "learning_rate": 4.228607120549657e-05, |
| "loss": 0.4355, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.718381112984823, |
| "grad_norm": 0.35341266657551246, |
| "learning_rate": 4.225484072454716e-05, |
| "loss": 0.4407, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7200674536256324, |
| "grad_norm": 0.32748306193146526, |
| "learning_rate": 4.222361024359775e-05, |
| "loss": 0.4526, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7217537942664418, |
| "grad_norm": 0.3476633561960724, |
| "learning_rate": 4.219237976264835e-05, |
| "loss": 0.4326, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7234401349072512, |
| "grad_norm": 0.39968844352495325, |
| "learning_rate": 4.216114928169894e-05, |
| "loss": 0.4386, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7251264755480608, |
| "grad_norm": 0.3422492650637317, |
| "learning_rate": 4.2129918800749536e-05, |
| "loss": 0.4408, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7268128161888702, |
| "grad_norm": 0.38617427295803425, |
| "learning_rate": 4.2098688319800126e-05, |
| "loss": 0.4405, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7284991568296796, |
| "grad_norm": 0.382691434468715, |
| "learning_rate": 4.206745783885072e-05, |
| "loss": 0.4352, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.730185497470489, |
| "grad_norm": 0.34931977303922557, |
| "learning_rate": 4.203622735790131e-05, |
| "loss": 0.4515, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.7318718381112985, |
| "grad_norm": 0.36781584253563737, |
| "learning_rate": 4.200499687695191e-05, |
| "loss": 0.4678, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7335581787521079, |
| "grad_norm": 0.3439821047053887, |
| "learning_rate": 4.19737663960025e-05, |
| "loss": 0.4346, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7352445193929174, |
| "grad_norm": 0.33784258943847506, |
| "learning_rate": 4.194253591505309e-05, |
| "loss": 0.4571, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7369308600337268, |
| "grad_norm": 0.3458788419301591, |
| "learning_rate": 4.191130543410369e-05, |
| "loss": 0.4317, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7386172006745363, |
| "grad_norm": 0.3511715747554807, |
| "learning_rate": 4.188007495315428e-05, |
| "loss": 0.4355, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7403035413153457, |
| "grad_norm": 0.4087128357184536, |
| "learning_rate": 4.1848844472204875e-05, |
| "loss": 0.446, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.7419898819561551, |
| "grad_norm": 0.29749880486910146, |
| "learning_rate": 4.1817613991255465e-05, |
| "loss": 0.4369, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7436762225969646, |
| "grad_norm": 0.4058139525114164, |
| "learning_rate": 4.178638351030606e-05, |
| "loss": 0.4312, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7453625632377741, |
| "grad_norm": 0.4113540180328209, |
| "learning_rate": 4.175515302935665e-05, |
| "loss": 0.4565, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7470489038785835, |
| "grad_norm": 0.2859777308910414, |
| "learning_rate": 4.172392254840725e-05, |
| "loss": 0.4476, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7487352445193929, |
| "grad_norm": 0.4494426475589675, |
| "learning_rate": 4.169269206745784e-05, |
| "loss": 0.4293, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7504215851602024, |
| "grad_norm": 0.3191112415766653, |
| "learning_rate": 4.166146158650843e-05, |
| "loss": 0.4543, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7521079258010118, |
| "grad_norm": 0.4899358892440741, |
| "learning_rate": 4.1630231105559026e-05, |
| "loss": 0.4316, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7537942664418212, |
| "grad_norm": 0.4045866323132377, |
| "learning_rate": 4.1599000624609616e-05, |
| "loss": 0.4572, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7554806070826307, |
| "grad_norm": 0.40606674939278087, |
| "learning_rate": 4.156777014366021e-05, |
| "loss": 0.4305, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7571669477234402, |
| "grad_norm": 0.41131034817801554, |
| "learning_rate": 4.15365396627108e-05, |
| "loss": 0.448, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.7588532883642496, |
| "grad_norm": 0.36374019960621246, |
| "learning_rate": 4.150530918176141e-05, |
| "loss": 0.4471, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.760539629005059, |
| "grad_norm": 0.3407701272183516, |
| "learning_rate": 4.1474078700812e-05, |
| "loss": 0.4416, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7622259696458684, |
| "grad_norm": 0.33033061668230757, |
| "learning_rate": 4.144284821986259e-05, |
| "loss": 0.4383, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7639123102866779, |
| "grad_norm": 0.32073839968069323, |
| "learning_rate": 4.1411617738913184e-05, |
| "loss": 0.4311, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7655986509274874, |
| "grad_norm": 0.40654395677206645, |
| "learning_rate": 4.1380387257963775e-05, |
| "loss": 0.4507, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7672849915682968, |
| "grad_norm": 0.3331641705190293, |
| "learning_rate": 4.134915677701437e-05, |
| "loss": 0.4588, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7689713322091062, |
| "grad_norm": 0.36035655188344745, |
| "learning_rate": 4.131792629606496e-05, |
| "loss": 0.4316, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7706576728499157, |
| "grad_norm": 0.3529072751955698, |
| "learning_rate": 4.128669581511556e-05, |
| "loss": 0.4505, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7723440134907251, |
| "grad_norm": 0.3717386077872241, |
| "learning_rate": 4.125546533416615e-05, |
| "loss": 0.4252, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7740303541315345, |
| "grad_norm": 0.35367881506426163, |
| "learning_rate": 4.122423485321674e-05, |
| "loss": 0.4401, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.7757166947723441, |
| "grad_norm": 0.4049795855490162, |
| "learning_rate": 4.1193004372267336e-05, |
| "loss": 0.4431, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7774030354131535, |
| "grad_norm": 0.3916967673517559, |
| "learning_rate": 4.1161773891317926e-05, |
| "loss": 0.4367, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7790893760539629, |
| "grad_norm": 0.46051501058246047, |
| "learning_rate": 4.113054341036852e-05, |
| "loss": 0.4305, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7807757166947723, |
| "grad_norm": 0.3470589434545159, |
| "learning_rate": 4.109931292941911e-05, |
| "loss": 0.4402, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7824620573355818, |
| "grad_norm": 0.4352877775027757, |
| "learning_rate": 4.106808244846971e-05, |
| "loss": 0.4353, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7841483979763912, |
| "grad_norm": 0.3193187418301974, |
| "learning_rate": 4.10368519675203e-05, |
| "loss": 0.4417, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7858347386172007, |
| "grad_norm": 0.3143650168188273, |
| "learning_rate": 4.10056214865709e-05, |
| "loss": 0.4571, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7875210792580101, |
| "grad_norm": 0.3219340880551001, |
| "learning_rate": 4.097439100562149e-05, |
| "loss": 0.447, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7892074198988196, |
| "grad_norm": 0.3012606759811735, |
| "learning_rate": 4.094316052467208e-05, |
| "loss": 0.4066, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.790893760539629, |
| "grad_norm": 0.3483395029680997, |
| "learning_rate": 4.0911930043722674e-05, |
| "loss": 0.4227, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.7925801011804384, |
| "grad_norm": 0.35722805142424147, |
| "learning_rate": 4.0880699562773265e-05, |
| "loss": 0.4163, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7942664418212478, |
| "grad_norm": 0.38762688601542045, |
| "learning_rate": 4.084946908182386e-05, |
| "loss": 0.4428, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7959527824620574, |
| "grad_norm": 0.44221716942779493, |
| "learning_rate": 4.081823860087445e-05, |
| "loss": 0.4475, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7976391231028668, |
| "grad_norm": 0.42521687012311943, |
| "learning_rate": 4.078700811992505e-05, |
| "loss": 0.4435, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7993254637436762, |
| "grad_norm": 0.4614612479724292, |
| "learning_rate": 4.075577763897564e-05, |
| "loss": 0.4342, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8010118043844857, |
| "grad_norm": 0.3880206058427269, |
| "learning_rate": 4.0724547158026236e-05, |
| "loss": 0.4525, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8026981450252951, |
| "grad_norm": 0.38043035899138344, |
| "learning_rate": 4.069331667707683e-05, |
| "loss": 0.4402, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8043844856661045, |
| "grad_norm": 0.41207333481490105, |
| "learning_rate": 4.066208619612742e-05, |
| "loss": 0.4696, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.806070826306914, |
| "grad_norm": 0.4060511255941783, |
| "learning_rate": 4.063085571517802e-05, |
| "loss": 0.4323, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8077571669477235, |
| "grad_norm": 0.40363614657308583, |
| "learning_rate": 4.059962523422861e-05, |
| "loss": 0.4557, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.8094435075885329, |
| "grad_norm": 0.4188029298749793, |
| "learning_rate": 4.056839475327921e-05, |
| "loss": 0.4169, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8111298482293423, |
| "grad_norm": 0.3254549567851391, |
| "learning_rate": 4.05371642723298e-05, |
| "loss": 0.4193, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8128161888701517, |
| "grad_norm": 0.4962719043903534, |
| "learning_rate": 4.050593379138039e-05, |
| "loss": 0.4464, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8145025295109612, |
| "grad_norm": 0.3295931350843861, |
| "learning_rate": 4.0474703310430984e-05, |
| "loss": 0.43, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8161888701517707, |
| "grad_norm": 0.47883553020245095, |
| "learning_rate": 4.0443472829481574e-05, |
| "loss": 0.4522, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8178752107925801, |
| "grad_norm": 0.33906309279359814, |
| "learning_rate": 4.041224234853217e-05, |
| "loss": 0.4482, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8195615514333895, |
| "grad_norm": 0.40081929926357074, |
| "learning_rate": 4.038101186758276e-05, |
| "loss": 0.4477, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.821247892074199, |
| "grad_norm": 0.33573980380519236, |
| "learning_rate": 4.034978138663336e-05, |
| "loss": 0.4308, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8229342327150084, |
| "grad_norm": 0.38799000304628345, |
| "learning_rate": 4.031855090568395e-05, |
| "loss": 0.4383, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8246205733558178, |
| "grad_norm": 0.3096129062326779, |
| "learning_rate": 4.0287320424734545e-05, |
| "loss": 0.4463, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.8263069139966274, |
| "grad_norm": 0.38212217849587243, |
| "learning_rate": 4.0256089943785135e-05, |
| "loss": 0.4401, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8279932546374368, |
| "grad_norm": 0.3248274009219074, |
| "learning_rate": 4.0224859462835726e-05, |
| "loss": 0.4352, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.8296795952782462, |
| "grad_norm": 0.3643266483390527, |
| "learning_rate": 4.019362898188632e-05, |
| "loss": 0.4729, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8313659359190556, |
| "grad_norm": 0.3487030333559748, |
| "learning_rate": 4.016239850093691e-05, |
| "loss": 0.4266, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.8330522765598651, |
| "grad_norm": 0.35571596803070454, |
| "learning_rate": 4.013116801998751e-05, |
| "loss": 0.4438, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8347386172006745, |
| "grad_norm": 0.41833496260050856, |
| "learning_rate": 4.00999375390381e-05, |
| "loss": 0.4503, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.836424957841484, |
| "grad_norm": 0.3986814710468665, |
| "learning_rate": 4.00687070580887e-05, |
| "loss": 0.4315, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8381112984822934, |
| "grad_norm": 0.3290599779478399, |
| "learning_rate": 4.003747657713929e-05, |
| "loss": 0.4333, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.8397976391231029, |
| "grad_norm": 0.38239443500458137, |
| "learning_rate": 4.0006246096189884e-05, |
| "loss": 0.448, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8414839797639123, |
| "grad_norm": 0.321771585575904, |
| "learning_rate": 3.9975015615240474e-05, |
| "loss": 0.4208, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8431703204047217, |
| "grad_norm": 0.40619718423970064, |
| "learning_rate": 3.9943785134291064e-05, |
| "loss": 0.4494, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8448566610455311, |
| "grad_norm": 0.38935911453692046, |
| "learning_rate": 3.991255465334167e-05, |
| "loss": 0.4354, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8465430016863407, |
| "grad_norm": 0.34815254820703556, |
| "learning_rate": 3.988132417239226e-05, |
| "loss": 0.4414, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8482293423271501, |
| "grad_norm": 0.4065925055362203, |
| "learning_rate": 3.9850093691442855e-05, |
| "loss": 0.4396, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8499156829679595, |
| "grad_norm": 0.32855881491070554, |
| "learning_rate": 3.9818863210493445e-05, |
| "loss": 0.4371, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.851602023608769, |
| "grad_norm": 0.41451234973837914, |
| "learning_rate": 3.978763272954404e-05, |
| "loss": 0.4501, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8532883642495784, |
| "grad_norm": 0.3464873489983337, |
| "learning_rate": 3.975640224859463e-05, |
| "loss": 0.4379, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8549747048903878, |
| "grad_norm": 0.34786674244235233, |
| "learning_rate": 3.972517176764522e-05, |
| "loss": 0.429, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8566610455311973, |
| "grad_norm": 0.3451218776597317, |
| "learning_rate": 3.969394128669582e-05, |
| "loss": 0.4462, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8583473861720068, |
| "grad_norm": 0.3575771672004591, |
| "learning_rate": 3.966271080574641e-05, |
| "loss": 0.4266, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.8600337268128162, |
| "grad_norm": 0.2989153179892053, |
| "learning_rate": 3.9631480324797006e-05, |
| "loss": 0.4288, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8617200674536256, |
| "grad_norm": 0.3436388897224447, |
| "learning_rate": 3.96002498438476e-05, |
| "loss": 0.4095, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.863406408094435, |
| "grad_norm": 0.35763903249570084, |
| "learning_rate": 3.9569019362898194e-05, |
| "loss": 0.4562, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8650927487352446, |
| "grad_norm": 0.41390070859437555, |
| "learning_rate": 3.9537788881948784e-05, |
| "loss": 0.4455, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.866779089376054, |
| "grad_norm": 0.34010398726999513, |
| "learning_rate": 3.9506558400999374e-05, |
| "loss": 0.4577, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.8684654300168634, |
| "grad_norm": 0.37866647645676466, |
| "learning_rate": 3.947532792004997e-05, |
| "loss": 0.4386, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8701517706576728, |
| "grad_norm": 0.30028155172407867, |
| "learning_rate": 3.944409743910056e-05, |
| "loss": 0.434, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8718381112984823, |
| "grad_norm": 0.3669560450150648, |
| "learning_rate": 3.941286695815116e-05, |
| "loss": 0.4438, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8735244519392917, |
| "grad_norm": 0.3457542078983038, |
| "learning_rate": 3.938163647720175e-05, |
| "loss": 0.453, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8752107925801011, |
| "grad_norm": 0.33965599759527376, |
| "learning_rate": 3.9350405996252345e-05, |
| "loss": 0.4178, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.8768971332209107, |
| "grad_norm": 0.3721436758139496, |
| "learning_rate": 3.9319175515302935e-05, |
| "loss": 0.4468, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8785834738617201, |
| "grad_norm": 0.4061525669717563, |
| "learning_rate": 3.928794503435353e-05, |
| "loss": 0.4259, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8802698145025295, |
| "grad_norm": 0.30659043214103315, |
| "learning_rate": 3.925671455340412e-05, |
| "loss": 0.4272, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8819561551433389, |
| "grad_norm": 0.3409351347239614, |
| "learning_rate": 3.922548407245471e-05, |
| "loss": 0.4421, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.8836424957841484, |
| "grad_norm": 0.35220982612512924, |
| "learning_rate": 3.919425359150531e-05, |
| "loss": 0.4412, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8853288364249579, |
| "grad_norm": 0.30552652380518674, |
| "learning_rate": 3.91630231105559e-05, |
| "loss": 0.4214, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8870151770657673, |
| "grad_norm": 0.4137227791099141, |
| "learning_rate": 3.9131792629606496e-05, |
| "loss": 0.4269, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8887015177065767, |
| "grad_norm": 0.3150228249974658, |
| "learning_rate": 3.9100562148657093e-05, |
| "loss": 0.455, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8903878583473862, |
| "grad_norm": 0.3686613874530323, |
| "learning_rate": 3.906933166770769e-05, |
| "loss": 0.4476, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8920741989881956, |
| "grad_norm": 0.3430472314617903, |
| "learning_rate": 3.903810118675828e-05, |
| "loss": 0.4482, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.893760539629005, |
| "grad_norm": 0.35676005653730164, |
| "learning_rate": 3.900687070580887e-05, |
| "loss": 0.4377, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8954468802698144, |
| "grad_norm": 0.3208027933402526, |
| "learning_rate": 3.897564022485947e-05, |
| "loss": 0.4211, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.897133220910624, |
| "grad_norm": 0.3672019529503482, |
| "learning_rate": 3.894440974391006e-05, |
| "loss": 0.4376, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8988195615514334, |
| "grad_norm": 0.3253386882071814, |
| "learning_rate": 3.8913179262960655e-05, |
| "loss": 0.4445, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9005059021922428, |
| "grad_norm": 0.39486961722325015, |
| "learning_rate": 3.8881948782011245e-05, |
| "loss": 0.4481, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9021922428330523, |
| "grad_norm": 0.40525246697045486, |
| "learning_rate": 3.885071830106184e-05, |
| "loss": 0.4437, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9038785834738617, |
| "grad_norm": 0.3448446619749969, |
| "learning_rate": 3.881948782011243e-05, |
| "loss": 0.44, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9055649241146712, |
| "grad_norm": 0.3823092376610685, |
| "learning_rate": 3.878825733916302e-05, |
| "loss": 0.4289, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9072512647554806, |
| "grad_norm": 0.35549627695079783, |
| "learning_rate": 3.875702685821362e-05, |
| "loss": 0.4365, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9089376053962901, |
| "grad_norm": 0.361600452020028, |
| "learning_rate": 3.872579637726421e-05, |
| "loss": 0.4333, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.9106239460370995, |
| "grad_norm": 0.39238177458414175, |
| "learning_rate": 3.8694565896314806e-05, |
| "loss": 0.4361, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9123102866779089, |
| "grad_norm": 0.30858347531378094, |
| "learning_rate": 3.8663335415365396e-05, |
| "loss": 0.4439, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9139966273187183, |
| "grad_norm": 0.40433613406568636, |
| "learning_rate": 3.863210493441599e-05, |
| "loss": 0.4343, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9156829679595279, |
| "grad_norm": 0.32314244386563457, |
| "learning_rate": 3.8600874453466583e-05, |
| "loss": 0.4402, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.9173693086003373, |
| "grad_norm": 0.35329137291725576, |
| "learning_rate": 3.856964397251718e-05, |
| "loss": 0.4291, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9190556492411467, |
| "grad_norm": 0.4094160268434705, |
| "learning_rate": 3.853841349156777e-05, |
| "loss": 0.4553, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9207419898819561, |
| "grad_norm": 0.35244877974951694, |
| "learning_rate": 3.850718301061836e-05, |
| "loss": 0.4378, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9224283305227656, |
| "grad_norm": 0.34568213081212973, |
| "learning_rate": 3.847595252966896e-05, |
| "loss": 0.4117, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.924114671163575, |
| "grad_norm": 0.3709568818749086, |
| "learning_rate": 3.844472204871955e-05, |
| "loss": 0.4306, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9258010118043845, |
| "grad_norm": 0.3775761038435258, |
| "learning_rate": 3.8413491567770145e-05, |
| "loss": 0.441, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.927487352445194, |
| "grad_norm": 0.3698983531500424, |
| "learning_rate": 3.8382261086820735e-05, |
| "loss": 0.4314, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9291736930860034, |
| "grad_norm": 0.4074391201937174, |
| "learning_rate": 3.835103060587133e-05, |
| "loss": 0.4317, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9308600337268128, |
| "grad_norm": 0.426029092659015, |
| "learning_rate": 3.831980012492192e-05, |
| "loss": 0.4368, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9325463743676222, |
| "grad_norm": 0.34454783098281766, |
| "learning_rate": 3.828856964397252e-05, |
| "loss": 0.4458, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.9342327150084317, |
| "grad_norm": 0.3817185847692516, |
| "learning_rate": 3.8257339163023116e-05, |
| "loss": 0.4576, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9359190556492412, |
| "grad_norm": 0.3140371120993948, |
| "learning_rate": 3.8226108682073706e-05, |
| "loss": 0.4267, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.9376053962900506, |
| "grad_norm": 0.37134960441734055, |
| "learning_rate": 3.81948782011243e-05, |
| "loss": 0.4371, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.93929173693086, |
| "grad_norm": 0.3261734677460667, |
| "learning_rate": 3.816364772017489e-05, |
| "loss": 0.4312, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.9409780775716695, |
| "grad_norm": 0.3517792771913715, |
| "learning_rate": 3.813241723922549e-05, |
| "loss": 0.4286, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.9426644182124789, |
| "grad_norm": 0.32412252967719923, |
| "learning_rate": 3.810118675827608e-05, |
| "loss": 0.4381, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.9443507588532883, |
| "grad_norm": 0.3200973688390884, |
| "learning_rate": 3.806995627732668e-05, |
| "loss": 0.4334, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9460370994940978, |
| "grad_norm": 0.32686280606429596, |
| "learning_rate": 3.803872579637727e-05, |
| "loss": 0.4329, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.9477234401349073, |
| "grad_norm": 0.3243359164811927, |
| "learning_rate": 3.800749531542786e-05, |
| "loss": 0.4338, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9494097807757167, |
| "grad_norm": 0.3487166516003897, |
| "learning_rate": 3.7976264834478454e-05, |
| "loss": 0.4319, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9510961214165261, |
| "grad_norm": 0.3272475408871655, |
| "learning_rate": 3.7945034353529045e-05, |
| "loss": 0.435, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.9527824620573356, |
| "grad_norm": 0.3669600629409637, |
| "learning_rate": 3.791380387257964e-05, |
| "loss": 0.4466, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.954468802698145, |
| "grad_norm": 0.2963475755698808, |
| "learning_rate": 3.788257339163023e-05, |
| "loss": 0.4324, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9561551433389545, |
| "grad_norm": 0.2997001225929376, |
| "learning_rate": 3.785134291068083e-05, |
| "loss": 0.4227, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9578414839797639, |
| "grad_norm": 0.4024928697863006, |
| "learning_rate": 3.782011242973142e-05, |
| "loss": 0.4229, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9595278246205734, |
| "grad_norm": 0.32574334101219726, |
| "learning_rate": 3.778888194878201e-05, |
| "loss": 0.4333, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.9612141652613828, |
| "grad_norm": 0.4233326029987618, |
| "learning_rate": 3.7757651467832606e-05, |
| "loss": 0.4258, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9629005059021922, |
| "grad_norm": 0.31441995646070814, |
| "learning_rate": 3.7726420986883196e-05, |
| "loss": 0.4366, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9645868465430016, |
| "grad_norm": 0.3978301275223556, |
| "learning_rate": 3.769519050593379e-05, |
| "loss": 0.4386, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9662731871838112, |
| "grad_norm": 0.38255265169765723, |
| "learning_rate": 3.766396002498438e-05, |
| "loss": 0.4465, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9679595278246206, |
| "grad_norm": 0.41128548742947124, |
| "learning_rate": 3.763272954403498e-05, |
| "loss": 0.4217, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.96964586846543, |
| "grad_norm": 0.3718617149053186, |
| "learning_rate": 3.760149906308557e-05, |
| "loss": 0.4252, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.9713322091062394, |
| "grad_norm": 0.3887832792374972, |
| "learning_rate": 3.757026858213617e-05, |
| "loss": 0.4529, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9730185497470489, |
| "grad_norm": 0.4360800602788443, |
| "learning_rate": 3.753903810118676e-05, |
| "loss": 0.4319, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9747048903878583, |
| "grad_norm": 0.33415211151494867, |
| "learning_rate": 3.750780762023735e-05, |
| "loss": 0.4246, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9763912310286678, |
| "grad_norm": 0.3189675808202832, |
| "learning_rate": 3.747657713928795e-05, |
| "loss": 0.4159, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.9780775716694773, |
| "grad_norm": 0.44576016512674754, |
| "learning_rate": 3.744534665833854e-05, |
| "loss": 0.4537, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9797639123102867, |
| "grad_norm": 0.36235218251396417, |
| "learning_rate": 3.741411617738914e-05, |
| "loss": 0.4511, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.9814502529510961, |
| "grad_norm": 0.40599216443718106, |
| "learning_rate": 3.738288569643973e-05, |
| "loss": 0.4388, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9831365935919055, |
| "grad_norm": 0.393707746186096, |
| "learning_rate": 3.7351655215490325e-05, |
| "loss": 0.437, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.984822934232715, |
| "grad_norm": 0.35873213762005124, |
| "learning_rate": 3.7320424734540916e-05, |
| "loss": 0.4317, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.9865092748735245, |
| "grad_norm": 0.3735478259156147, |
| "learning_rate": 3.7289194253591506e-05, |
| "loss": 0.435, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.9881956155143339, |
| "grad_norm": 0.3145636810256735, |
| "learning_rate": 3.72579637726421e-05, |
| "loss": 0.4301, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9898819561551433, |
| "grad_norm": 0.29373293301899733, |
| "learning_rate": 3.722673329169269e-05, |
| "loss": 0.4088, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9915682967959528, |
| "grad_norm": 0.39253716389128174, |
| "learning_rate": 3.719550281074329e-05, |
| "loss": 0.4104, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9932546374367622, |
| "grad_norm": 0.28317911466413853, |
| "learning_rate": 3.716427232979388e-05, |
| "loss": 0.4204, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.9949409780775716, |
| "grad_norm": 0.31230546154127087, |
| "learning_rate": 3.713304184884448e-05, |
| "loss": 0.4217, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9966273187183811, |
| "grad_norm": 0.3334436143958086, |
| "learning_rate": 3.710181136789507e-05, |
| "loss": 0.4302, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9983136593591906, |
| "grad_norm": 0.2968157232197624, |
| "learning_rate": 3.7070580886945664e-05, |
| "loss": 0.4253, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3146042029883502, |
| "learning_rate": 3.7039350405996254e-05, |
| "loss": 0.397, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.0016863406408094, |
| "grad_norm": 0.3370073663531201, |
| "learning_rate": 3.7008119925046844e-05, |
| "loss": 0.375, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.0033726812816188, |
| "grad_norm": 0.32116215055132985, |
| "learning_rate": 3.697688944409744e-05, |
| "loss": 0.3708, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0050590219224282, |
| "grad_norm": 0.29860857891458864, |
| "learning_rate": 3.694565896314803e-05, |
| "loss": 0.3865, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0067453625632379, |
| "grad_norm": 0.3275096400221224, |
| "learning_rate": 3.691442848219863e-05, |
| "loss": 0.3718, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.0084317032040473, |
| "grad_norm": 0.31486317653134394, |
| "learning_rate": 3.688319800124922e-05, |
| "loss": 0.3706, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0101180438448567, |
| "grad_norm": 0.39271594795296993, |
| "learning_rate": 3.6851967520299815e-05, |
| "loss": 0.3522, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.0118043844856661, |
| "grad_norm": 0.26586173702738847, |
| "learning_rate": 3.6820737039350406e-05, |
| "loss": 0.3659, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0134907251264755, |
| "grad_norm": 0.3550935166184591, |
| "learning_rate": 3.6789506558400996e-05, |
| "loss": 0.372, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.015177065767285, |
| "grad_norm": 0.3757468734375804, |
| "learning_rate": 3.675827607745159e-05, |
| "loss": 0.3755, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0168634064080944, |
| "grad_norm": 0.32345317238449983, |
| "learning_rate": 3.672704559650218e-05, |
| "loss": 0.3712, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.0185497470489038, |
| "grad_norm": 0.389457960719761, |
| "learning_rate": 3.669581511555278e-05, |
| "loss": 0.38, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.0202360876897134, |
| "grad_norm": 0.31136627461944766, |
| "learning_rate": 3.666458463460338e-05, |
| "loss": 0.3754, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.0219224283305228, |
| "grad_norm": 0.3728830518915666, |
| "learning_rate": 3.6633354153653974e-05, |
| "loss": 0.3768, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0236087689713322, |
| "grad_norm": 0.325914751533291, |
| "learning_rate": 3.6602123672704564e-05, |
| "loss": 0.361, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.0252951096121417, |
| "grad_norm": 0.32251147450644635, |
| "learning_rate": 3.6570893191755154e-05, |
| "loss": 0.3632, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.026981450252951, |
| "grad_norm": 0.38601247039180847, |
| "learning_rate": 3.653966271080575e-05, |
| "loss": 0.3374, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.0286677908937605, |
| "grad_norm": 0.3113652782897978, |
| "learning_rate": 3.650843222985634e-05, |
| "loss": 0.3492, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.03035413153457, |
| "grad_norm": 0.40738258586752285, |
| "learning_rate": 3.647720174890694e-05, |
| "loss": 0.3826, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.0320404721753795, |
| "grad_norm": 0.40482778144228854, |
| "learning_rate": 3.644597126795753e-05, |
| "loss": 0.3618, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.033726812816189, |
| "grad_norm": 0.3100389171136031, |
| "learning_rate": 3.6414740787008125e-05, |
| "loss": 0.3617, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.0354131534569984, |
| "grad_norm": 0.4345026237676389, |
| "learning_rate": 3.6383510306058715e-05, |
| "loss": 0.3818, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.0370994940978078, |
| "grad_norm": 0.3302182083765599, |
| "learning_rate": 3.635227982510931e-05, |
| "loss": 0.3802, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.0387858347386172, |
| "grad_norm": 0.3478874768962978, |
| "learning_rate": 3.63210493441599e-05, |
| "loss": 0.3722, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.0404721753794266, |
| "grad_norm": 0.3818087599342268, |
| "learning_rate": 3.628981886321049e-05, |
| "loss": 0.3785, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.042158516020236, |
| "grad_norm": 0.3103310310694465, |
| "learning_rate": 3.625858838226109e-05, |
| "loss": 0.3554, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.0438448566610454, |
| "grad_norm": 0.3195864556944807, |
| "learning_rate": 3.622735790131168e-05, |
| "loss": 0.3624, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.045531197301855, |
| "grad_norm": 0.3715118708841681, |
| "learning_rate": 3.6196127420362277e-05, |
| "loss": 0.3638, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.0472175379426645, |
| "grad_norm": 0.2947796032379757, |
| "learning_rate": 3.616489693941287e-05, |
| "loss": 0.3772, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.048903878583474, |
| "grad_norm": 0.43267966177276695, |
| "learning_rate": 3.6133666458463464e-05, |
| "loss": 0.3674, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.0505902192242833, |
| "grad_norm": 0.27812298939709335, |
| "learning_rate": 3.6102435977514054e-05, |
| "loss": 0.3465, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.0522765598650927, |
| "grad_norm": 0.4438329515641008, |
| "learning_rate": 3.6071205496564644e-05, |
| "loss": 0.3667, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0539629005059021, |
| "grad_norm": 0.29768610317679384, |
| "learning_rate": 3.603997501561524e-05, |
| "loss": 0.3739, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.0556492411467115, |
| "grad_norm": 0.3100212861752048, |
| "learning_rate": 3.600874453466583e-05, |
| "loss": 0.358, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.0573355817875212, |
| "grad_norm": 0.3445036926061418, |
| "learning_rate": 3.597751405371643e-05, |
| "loss": 0.3684, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0590219224283306, |
| "grad_norm": 0.2718866041449956, |
| "learning_rate": 3.594628357276702e-05, |
| "loss": 0.3493, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.06070826306914, |
| "grad_norm": 0.3392352864903805, |
| "learning_rate": 3.5915053091817615e-05, |
| "loss": 0.3745, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.0623946037099494, |
| "grad_norm": 0.3328919083232664, |
| "learning_rate": 3.588382261086821e-05, |
| "loss": 0.3759, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.0640809443507588, |
| "grad_norm": 0.30982338431159007, |
| "learning_rate": 3.58525921299188e-05, |
| "loss": 0.3736, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.0657672849915683, |
| "grad_norm": 0.351636253410426, |
| "learning_rate": 3.58213616489694e-05, |
| "loss": 0.3779, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0674536256323777, |
| "grad_norm": 0.3019957709061264, |
| "learning_rate": 3.579013116801999e-05, |
| "loss": 0.386, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.069139966273187, |
| "grad_norm": 0.35166269106240106, |
| "learning_rate": 3.5758900687070586e-05, |
| "loss": 0.3698, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.0708263069139967, |
| "grad_norm": 0.2893719435618796, |
| "learning_rate": 3.5727670206121176e-05, |
| "loss": 0.3605, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.0725126475548061, |
| "grad_norm": 2.1683991651466523, |
| "learning_rate": 3.569643972517177e-05, |
| "loss": 0.4151, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.0741989881956155, |
| "grad_norm": 0.34939031231545353, |
| "learning_rate": 3.5665209244222363e-05, |
| "loss": 0.357, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.075885328836425, |
| "grad_norm": 0.33365230698899556, |
| "learning_rate": 3.563397876327296e-05, |
| "loss": 0.3569, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.0775716694772344, |
| "grad_norm": 0.38333312282223403, |
| "learning_rate": 3.560274828232355e-05, |
| "loss": 0.3725, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.0792580101180438, |
| "grad_norm": 0.2989943238359894, |
| "learning_rate": 3.557151780137414e-05, |
| "loss": 0.3659, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0809443507588532, |
| "grad_norm": 0.35190364679048197, |
| "learning_rate": 3.554028732042474e-05, |
| "loss": 0.3456, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.0826306913996628, |
| "grad_norm": 0.3102309344220619, |
| "learning_rate": 3.550905683947533e-05, |
| "loss": 0.3513, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.0843170320404723, |
| "grad_norm": 0.34766344126692783, |
| "learning_rate": 3.5477826358525925e-05, |
| "loss": 0.3508, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.0860033726812817, |
| "grad_norm": 0.30303909121585015, |
| "learning_rate": 3.5446595877576515e-05, |
| "loss": 0.3789, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.087689713322091, |
| "grad_norm": 0.3531070193198295, |
| "learning_rate": 3.541536539662711e-05, |
| "loss": 0.3697, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.0893760539629005, |
| "grad_norm": 0.3442402875788552, |
| "learning_rate": 3.53841349156777e-05, |
| "loss": 0.3748, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.09106239460371, |
| "grad_norm": 0.31913063134238107, |
| "learning_rate": 3.53529044347283e-05, |
| "loss": 0.3527, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.0927487352445193, |
| "grad_norm": 0.3150943101074222, |
| "learning_rate": 3.532167395377889e-05, |
| "loss": 0.3736, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.0944350758853287, |
| "grad_norm": 0.3292315737364209, |
| "learning_rate": 3.529044347282948e-05, |
| "loss": 0.3775, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.0961214165261384, |
| "grad_norm": 0.29052932382842833, |
| "learning_rate": 3.5259212991880076e-05, |
| "loss": 0.3731, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0978077571669478, |
| "grad_norm": 0.375090842212462, |
| "learning_rate": 3.5227982510930666e-05, |
| "loss": 0.356, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.0994940978077572, |
| "grad_norm": 0.32161697933374434, |
| "learning_rate": 3.519675202998126e-05, |
| "loss": 0.3548, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.1011804384485666, |
| "grad_norm": 0.2879157921021396, |
| "learning_rate": 3.5165521549031853e-05, |
| "loss": 0.358, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.102866779089376, |
| "grad_norm": 0.41727435113409483, |
| "learning_rate": 3.513429106808245e-05, |
| "loss": 0.381, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1045531197301854, |
| "grad_norm": 0.2837299613816854, |
| "learning_rate": 3.510306058713304e-05, |
| "loss": 0.3533, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.1062394603709949, |
| "grad_norm": 0.33391080950397317, |
| "learning_rate": 3.507183010618364e-05, |
| "loss": 0.3561, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.1079258010118043, |
| "grad_norm": 0.29547239404552594, |
| "learning_rate": 3.5040599625234234e-05, |
| "loss": 0.3644, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.109612141652614, |
| "grad_norm": 0.32594239439972955, |
| "learning_rate": 3.5009369144284825e-05, |
| "loss": 0.3673, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.1112984822934233, |
| "grad_norm": 0.32452664682226484, |
| "learning_rate": 3.497813866333542e-05, |
| "loss": 0.3678, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.1129848229342327, |
| "grad_norm": 0.31349408760230113, |
| "learning_rate": 3.494690818238601e-05, |
| "loss": 0.3551, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1146711635750421, |
| "grad_norm": 0.48435598730621804, |
| "learning_rate": 3.491567770143661e-05, |
| "loss": 0.3797, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.1163575042158516, |
| "grad_norm": 0.3427185023275982, |
| "learning_rate": 3.48844472204872e-05, |
| "loss": 0.3729, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.118043844856661, |
| "grad_norm": 0.37943453765393537, |
| "learning_rate": 3.485321673953779e-05, |
| "loss": 0.3711, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.1197301854974704, |
| "grad_norm": 0.32502964540546436, |
| "learning_rate": 3.4821986258588386e-05, |
| "loss": 0.389, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.12141652613828, |
| "grad_norm": 0.3264086956433378, |
| "learning_rate": 3.4790755777638976e-05, |
| "loss": 0.3593, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.1231028667790894, |
| "grad_norm": 0.3953500314813085, |
| "learning_rate": 3.475952529668957e-05, |
| "loss": 0.392, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1247892074198989, |
| "grad_norm": 0.29474500867892905, |
| "learning_rate": 3.472829481574016e-05, |
| "loss": 0.3597, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.1264755480607083, |
| "grad_norm": 0.3488785070722052, |
| "learning_rate": 3.469706433479076e-05, |
| "loss": 0.3749, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1281618887015177, |
| "grad_norm": 0.3248792393815756, |
| "learning_rate": 3.466583385384135e-05, |
| "loss": 0.3598, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.129848229342327, |
| "grad_norm": 0.3224420541767695, |
| "learning_rate": 3.463460337289195e-05, |
| "loss": 0.3719, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1315345699831365, |
| "grad_norm": 0.3417491355704424, |
| "learning_rate": 3.460337289194254e-05, |
| "loss": 0.362, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.1332209106239461, |
| "grad_norm": 0.3427747866197647, |
| "learning_rate": 3.457214241099313e-05, |
| "loss": 0.367, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.1349072512647556, |
| "grad_norm": 0.3106830438240518, |
| "learning_rate": 3.4540911930043724e-05, |
| "loss": 0.367, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.136593591905565, |
| "grad_norm": 0.3540348746643401, |
| "learning_rate": 3.4509681449094315e-05, |
| "loss": 0.3555, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.1382799325463744, |
| "grad_norm": 0.34466886965401156, |
| "learning_rate": 3.447845096814491e-05, |
| "loss": 0.3628, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.1399662731871838, |
| "grad_norm": 0.3164738524402864, |
| "learning_rate": 3.44472204871955e-05, |
| "loss": 0.3602, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.1416526138279932, |
| "grad_norm": 0.37994324932360596, |
| "learning_rate": 3.44159900062461e-05, |
| "loss": 0.3625, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.1433389544688026, |
| "grad_norm": 0.356114149548685, |
| "learning_rate": 3.438475952529669e-05, |
| "loss": 0.3738, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.1450252951096123, |
| "grad_norm": 0.3225747215773845, |
| "learning_rate": 3.435352904434728e-05, |
| "loss": 0.3604, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.1467116357504217, |
| "grad_norm": 0.3773129300465698, |
| "learning_rate": 3.4322298563397876e-05, |
| "loss": 0.3788, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.148397976391231, |
| "grad_norm": 0.304117819143786, |
| "learning_rate": 3.4291068082448466e-05, |
| "loss": 0.3732, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.1500843170320405, |
| "grad_norm": 0.3001832144319778, |
| "learning_rate": 3.425983760149907e-05, |
| "loss": 0.3608, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.15177065767285, |
| "grad_norm": 0.2976369748044028, |
| "learning_rate": 3.422860712054966e-05, |
| "loss": 0.38, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.1534569983136593, |
| "grad_norm": 0.2993556584838581, |
| "learning_rate": 3.419737663960026e-05, |
| "loss": 0.3469, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.1551433389544687, |
| "grad_norm": 0.2952476863488473, |
| "learning_rate": 3.416614615865085e-05, |
| "loss": 0.376, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.1568296795952782, |
| "grad_norm": 0.30611597016381326, |
| "learning_rate": 3.413491567770144e-05, |
| "loss": 0.3447, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.1585160202360876, |
| "grad_norm": 0.33324196316017346, |
| "learning_rate": 3.4103685196752034e-05, |
| "loss": 0.3685, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.1602023608768972, |
| "grad_norm": 0.3616915720368816, |
| "learning_rate": 3.4072454715802624e-05, |
| "loss": 0.3688, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1618887015177066, |
| "grad_norm": 0.3742976223101849, |
| "learning_rate": 3.404122423485322e-05, |
| "loss": 0.3686, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.163575042158516, |
| "grad_norm": 0.30457069197757114, |
| "learning_rate": 3.400999375390381e-05, |
| "loss": 0.3475, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1652613827993255, |
| "grad_norm": 0.40089797869146526, |
| "learning_rate": 3.397876327295441e-05, |
| "loss": 0.4003, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.1669477234401349, |
| "grad_norm": 0.31965659321223927, |
| "learning_rate": 3.3947532792005e-05, |
| "loss": 0.3769, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.1686340640809443, |
| "grad_norm": 0.3439706518766779, |
| "learning_rate": 3.3916302311055595e-05, |
| "loss": 0.3659, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.1703204047217537, |
| "grad_norm": 0.3429902418282865, |
| "learning_rate": 3.3885071830106186e-05, |
| "loss": 0.3604, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.1720067453625633, |
| "grad_norm": 0.35074735517188066, |
| "learning_rate": 3.3853841349156776e-05, |
| "loss": 0.367, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.1736930860033727, |
| "grad_norm": 0.3223682518850536, |
| "learning_rate": 3.382261086820737e-05, |
| "loss": 0.359, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.1753794266441822, |
| "grad_norm": 0.36290253264152483, |
| "learning_rate": 3.379138038725796e-05, |
| "loss": 0.3579, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.1770657672849916, |
| "grad_norm": 0.40337855296136965, |
| "learning_rate": 3.376014990630856e-05, |
| "loss": 0.3625, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.178752107925801, |
| "grad_norm": 0.28866061205127996, |
| "learning_rate": 3.372891942535915e-05, |
| "loss": 0.376, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.1804384485666104, |
| "grad_norm": 0.4563999065836459, |
| "learning_rate": 3.369768894440975e-05, |
| "loss": 0.3477, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1821247892074198, |
| "grad_norm": 0.28381683936533614, |
| "learning_rate": 3.366645846346034e-05, |
| "loss": 0.358, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.1838111298482294, |
| "grad_norm": 0.3681545709767134, |
| "learning_rate": 3.3635227982510934e-05, |
| "loss": 0.3488, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.1854974704890389, |
| "grad_norm": 0.3499215938015231, |
| "learning_rate": 3.3603997501561524e-05, |
| "loss": 0.3699, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.1871838111298483, |
| "grad_norm": 0.36319169174953636, |
| "learning_rate": 3.3572767020612114e-05, |
| "loss": 0.3696, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.1888701517706577, |
| "grad_norm": 0.3141413251617719, |
| "learning_rate": 3.354153653966271e-05, |
| "loss": 0.3386, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.190556492411467, |
| "grad_norm": 0.383269478456824, |
| "learning_rate": 3.35103060587133e-05, |
| "loss": 0.3668, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.1922428330522765, |
| "grad_norm": 0.33350904257579106, |
| "learning_rate": 3.34790755777639e-05, |
| "loss": 0.3762, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.193929173693086, |
| "grad_norm": 0.3789829657202058, |
| "learning_rate": 3.3447845096814495e-05, |
| "loss": 0.3689, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.1956155143338956, |
| "grad_norm": 0.31056826795813275, |
| "learning_rate": 3.3416614615865085e-05, |
| "loss": 0.3377, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.197301854974705, |
| "grad_norm": 0.35526213637912024, |
| "learning_rate": 3.338538413491568e-05, |
| "loss": 0.362, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1989881956155144, |
| "grad_norm": 0.2867614135968695, |
| "learning_rate": 3.335415365396627e-05, |
| "loss": 0.3549, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.2006745362563238, |
| "grad_norm": 0.3149147995866731, |
| "learning_rate": 3.332292317301687e-05, |
| "loss": 0.3625, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.2023608768971332, |
| "grad_norm": 0.32790661583501146, |
| "learning_rate": 3.329169269206746e-05, |
| "loss": 0.354, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.2040472175379426, |
| "grad_norm": 0.29102264869880434, |
| "learning_rate": 3.3260462211118057e-05, |
| "loss": 0.3761, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.205733558178752, |
| "grad_norm": 0.3187511092694005, |
| "learning_rate": 3.322923173016865e-05, |
| "loss": 0.3617, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.2074198988195615, |
| "grad_norm": 0.327560945523737, |
| "learning_rate": 3.3198001249219244e-05, |
| "loss": 0.3585, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.2091062394603709, |
| "grad_norm": 0.344406280985839, |
| "learning_rate": 3.3166770768269834e-05, |
| "loss": 0.363, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.2107925801011805, |
| "grad_norm": 0.2960616516303917, |
| "learning_rate": 3.3135540287320424e-05, |
| "loss": 0.3697, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.21247892074199, |
| "grad_norm": 0.34919128274833694, |
| "learning_rate": 3.310430980637102e-05, |
| "loss": 0.3417, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.2141652613827993, |
| "grad_norm": 0.33289746835413475, |
| "learning_rate": 3.307307932542161e-05, |
| "loss": 0.3824, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2158516020236088, |
| "grad_norm": 0.3131245593982676, |
| "learning_rate": 3.304184884447221e-05, |
| "loss": 0.3627, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.2175379426644182, |
| "grad_norm": 0.34440988286437363, |
| "learning_rate": 3.30106183635228e-05, |
| "loss": 0.384, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.2192242833052276, |
| "grad_norm": 0.30769578929370067, |
| "learning_rate": 3.2979387882573395e-05, |
| "loss": 0.3662, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.220910623946037, |
| "grad_norm": 0.36799706179743263, |
| "learning_rate": 3.2948157401623985e-05, |
| "loss": 0.3387, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2225969645868466, |
| "grad_norm": 0.29634378440964937, |
| "learning_rate": 3.291692692067458e-05, |
| "loss": 0.3463, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.224283305227656, |
| "grad_norm": 0.3136700584153526, |
| "learning_rate": 3.288569643972517e-05, |
| "loss": 0.3784, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.2259696458684655, |
| "grad_norm": 0.35752470110645485, |
| "learning_rate": 3.285446595877576e-05, |
| "loss": 0.3788, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.2276559865092749, |
| "grad_norm": 0.4833836006190696, |
| "learning_rate": 3.282323547782636e-05, |
| "loss": 0.3852, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.2293423271500843, |
| "grad_norm": 0.3228921026231067, |
| "learning_rate": 3.279200499687695e-05, |
| "loss": 0.35, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.2310286677908937, |
| "grad_norm": 0.3601983472885956, |
| "learning_rate": 3.2760774515927547e-05, |
| "loss": 0.3714, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2327150084317031, |
| "grad_norm": 0.32216952225076373, |
| "learning_rate": 3.272954403497814e-05, |
| "loss": 0.3463, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.2344013490725128, |
| "grad_norm": 0.34789998458821336, |
| "learning_rate": 3.2698313554028734e-05, |
| "loss": 0.3606, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2360876897133222, |
| "grad_norm": 0.3220946963248954, |
| "learning_rate": 3.2667083073079324e-05, |
| "loss": 0.3908, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.2377740303541316, |
| "grad_norm": 0.34525067270794385, |
| "learning_rate": 3.263585259212992e-05, |
| "loss": 0.3737, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.239460370994941, |
| "grad_norm": 0.363141368473777, |
| "learning_rate": 3.260462211118052e-05, |
| "loss": 0.3613, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.2411467116357504, |
| "grad_norm": 0.30845400363059916, |
| "learning_rate": 3.257339163023111e-05, |
| "loss": 0.3545, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.2428330522765598, |
| "grad_norm": 0.3292649941730577, |
| "learning_rate": 3.2542161149281705e-05, |
| "loss": 0.3637, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.2445193929173692, |
| "grad_norm": 0.3444162426809801, |
| "learning_rate": 3.2510930668332295e-05, |
| "loss": 0.3591, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.2462057335581789, |
| "grad_norm": 0.2780241415209115, |
| "learning_rate": 3.247970018738289e-05, |
| "loss": 0.3476, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.2478920741989883, |
| "grad_norm": 0.3310679629273186, |
| "learning_rate": 3.244846970643348e-05, |
| "loss": 0.3753, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.2495784148397977, |
| "grad_norm": 0.3228089052508038, |
| "learning_rate": 3.241723922548407e-05, |
| "loss": 0.3705, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.2512647554806071, |
| "grad_norm": 0.30123416686246896, |
| "learning_rate": 3.238600874453467e-05, |
| "loss": 0.3506, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.2529510961214165, |
| "grad_norm": 0.3047360398946294, |
| "learning_rate": 3.235477826358526e-05, |
| "loss": 0.3679, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.254637436762226, |
| "grad_norm": 0.28405410976343115, |
| "learning_rate": 3.2323547782635856e-05, |
| "loss": 0.3593, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.2563237774030354, |
| "grad_norm": 0.3220882220276252, |
| "learning_rate": 3.2292317301686446e-05, |
| "loss": 0.3575, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.258010118043845, |
| "grad_norm": 0.35944627744808794, |
| "learning_rate": 3.2261086820737043e-05, |
| "loss": 0.373, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.2596964586846542, |
| "grad_norm": 0.30281824632780585, |
| "learning_rate": 3.2229856339787634e-05, |
| "loss": 0.3681, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.2613827993254638, |
| "grad_norm": 0.353431345480095, |
| "learning_rate": 3.219862585883823e-05, |
| "loss": 0.3619, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.2630691399662732, |
| "grad_norm": 0.3055957478380962, |
| "learning_rate": 3.216739537788882e-05, |
| "loss": 0.3713, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.2647554806070826, |
| "grad_norm": 0.3103571679944118, |
| "learning_rate": 3.213616489693941e-05, |
| "loss": 0.3805, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.266441821247892, |
| "grad_norm": 0.3364232773283497, |
| "learning_rate": 3.210493441599001e-05, |
| "loss": 0.3804, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.2681281618887015, |
| "grad_norm": 0.27554897357070157, |
| "learning_rate": 3.20737039350406e-05, |
| "loss": 0.3524, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.269814502529511, |
| "grad_norm": 0.30387516090534045, |
| "learning_rate": 3.2042473454091195e-05, |
| "loss": 0.3543, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.2715008431703203, |
| "grad_norm": 0.3059496855372739, |
| "learning_rate": 3.2011242973141785e-05, |
| "loss": 0.3623, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.27318718381113, |
| "grad_norm": 0.30728724779696115, |
| "learning_rate": 3.198001249219238e-05, |
| "loss": 0.3668, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.2748735244519394, |
| "grad_norm": 0.3202085526337014, |
| "learning_rate": 3.194878201124297e-05, |
| "loss": 0.3671, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.2765598650927488, |
| "grad_norm": 0.32689712504751167, |
| "learning_rate": 3.191755153029357e-05, |
| "loss": 0.3695, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.2782462057335582, |
| "grad_norm": 0.30233880248863065, |
| "learning_rate": 3.188632104934416e-05, |
| "loss": 0.3736, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.2799325463743676, |
| "grad_norm": 0.32961492613846366, |
| "learning_rate": 3.185509056839475e-05, |
| "loss": 0.3684, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.281618887015177, |
| "grad_norm": 0.32713985472717844, |
| "learning_rate": 3.182386008744535e-05, |
| "loss": 0.3639, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.2833052276559864, |
| "grad_norm": 0.2686422618776032, |
| "learning_rate": 3.179262960649594e-05, |
| "loss": 0.3651, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.284991568296796, |
| "grad_norm": 0.33665097327652294, |
| "learning_rate": 3.176139912554654e-05, |
| "loss": 0.3858, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.2866779089376055, |
| "grad_norm": 0.29534168258311044, |
| "learning_rate": 3.173016864459713e-05, |
| "loss": 0.3507, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.2883642495784149, |
| "grad_norm": 0.35374323259618806, |
| "learning_rate": 3.169893816364772e-05, |
| "loss": 0.3906, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.2900505902192243, |
| "grad_norm": 0.3264610064516771, |
| "learning_rate": 3.166770768269832e-05, |
| "loss": 0.3904, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.2917369308600337, |
| "grad_norm": 0.2606629036110616, |
| "learning_rate": 3.163647720174891e-05, |
| "loss": 0.3625, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.2934232715008431, |
| "grad_norm": 0.31584649760135897, |
| "learning_rate": 3.1605246720799505e-05, |
| "loss": 0.3686, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.2951096121416525, |
| "grad_norm": 0.3223638084892562, |
| "learning_rate": 3.1574016239850095e-05, |
| "loss": 0.3461, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.2967959527824622, |
| "grad_norm": 0.28755849067927153, |
| "learning_rate": 3.154278575890069e-05, |
| "loss": 0.355, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.2984822934232714, |
| "grad_norm": 0.3088311800226945, |
| "learning_rate": 3.151155527795128e-05, |
| "loss": 0.3561, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.300168634064081, |
| "grad_norm": 0.3184212229471459, |
| "learning_rate": 3.148032479700188e-05, |
| "loss": 0.3685, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.3018549747048904, |
| "grad_norm": 0.3463641101847728, |
| "learning_rate": 3.144909431605247e-05, |
| "loss": 0.3785, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.3035413153456998, |
| "grad_norm": 0.3525193112827311, |
| "learning_rate": 3.141786383510306e-05, |
| "loss": 0.3558, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.3052276559865092, |
| "grad_norm": 0.4102862145029259, |
| "learning_rate": 3.1386633354153656e-05, |
| "loss": 0.3581, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3069139966273187, |
| "grad_norm": 0.29347725599207186, |
| "learning_rate": 3.1355402873204246e-05, |
| "loss": 0.3696, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.3086003372681283, |
| "grad_norm": 0.3201690267050578, |
| "learning_rate": 3.132417239225484e-05, |
| "loss": 0.3699, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.3102866779089375, |
| "grad_norm": 0.42217077326951885, |
| "learning_rate": 3.129294191130543e-05, |
| "loss": 0.378, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.3119730185497471, |
| "grad_norm": 0.3204268838545862, |
| "learning_rate": 3.126171143035603e-05, |
| "loss": 0.3864, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3136593591905565, |
| "grad_norm": 0.36927699806450415, |
| "learning_rate": 3.123048094940662e-05, |
| "loss": 0.3725, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.315345699831366, |
| "grad_norm": 0.36777202281156074, |
| "learning_rate": 3.119925046845722e-05, |
| "loss": 0.3478, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3170320404721754, |
| "grad_norm": 0.332383376452764, |
| "learning_rate": 3.116801998750781e-05, |
| "loss": 0.3684, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.3187183811129848, |
| "grad_norm": 0.26968150427190296, |
| "learning_rate": 3.11367895065584e-05, |
| "loss": 0.3661, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.3204047217537942, |
| "grad_norm": 0.2757213475684918, |
| "learning_rate": 3.1105559025608995e-05, |
| "loss": 0.3619, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.3220910623946036, |
| "grad_norm": 0.291925824986569, |
| "learning_rate": 3.1074328544659585e-05, |
| "loss": 0.365, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.3237774030354132, |
| "grad_norm": 0.28827293963232314, |
| "learning_rate": 3.104309806371019e-05, |
| "loss": 0.3567, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.3254637436762227, |
| "grad_norm": 0.3094407833363561, |
| "learning_rate": 3.101186758276078e-05, |
| "loss": 0.377, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.327150084317032, |
| "grad_norm": 0.2805255608547809, |
| "learning_rate": 3.098063710181137e-05, |
| "loss": 0.345, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.3288364249578415, |
| "grad_norm": 0.27048687320912446, |
| "learning_rate": 3.0949406620861966e-05, |
| "loss": 0.3667, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.330522765598651, |
| "grad_norm": 0.27434964698292863, |
| "learning_rate": 3.0918176139912556e-05, |
| "loss": 0.3475, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.3322091062394603, |
| "grad_norm": 0.30584385870013375, |
| "learning_rate": 3.088694565896315e-05, |
| "loss": 0.379, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.3338954468802697, |
| "grad_norm": 0.2877913618660639, |
| "learning_rate": 3.085571517801374e-05, |
| "loss": 0.3764, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.3355817875210794, |
| "grad_norm": 0.26134414301494957, |
| "learning_rate": 3.082448469706434e-05, |
| "loss": 0.3654, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.3372681281618888, |
| "grad_norm": 0.3192558959887312, |
| "learning_rate": 3.079325421611493e-05, |
| "loss": 0.3627, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.3389544688026982, |
| "grad_norm": 0.2682907090974044, |
| "learning_rate": 3.076202373516553e-05, |
| "loss": 0.3631, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.3406408094435076, |
| "grad_norm": 0.312326995625596, |
| "learning_rate": 3.073079325421612e-05, |
| "loss": 0.3731, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.342327150084317, |
| "grad_norm": 0.36559281139328087, |
| "learning_rate": 3.069956277326671e-05, |
| "loss": 0.3483, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3440134907251264, |
| "grad_norm": 0.2538475060279743, |
| "learning_rate": 3.0668332292317304e-05, |
| "loss": 0.3667, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.3456998313659359, |
| "grad_norm": 0.28040358277074, |
| "learning_rate": 3.0637101811367894e-05, |
| "loss": 0.3507, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.3473861720067455, |
| "grad_norm": 0.33938165368820494, |
| "learning_rate": 3.060587133041849e-05, |
| "loss": 0.3789, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.3490725126475547, |
| "grad_norm": 0.27797829986374323, |
| "learning_rate": 3.057464084946908e-05, |
| "loss": 0.3697, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3507588532883643, |
| "grad_norm": 0.317311724784089, |
| "learning_rate": 3.054341036851968e-05, |
| "loss": 0.3712, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.3524451939291737, |
| "grad_norm": 0.27067284761984195, |
| "learning_rate": 3.051217988757027e-05, |
| "loss": 0.3642, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.3541315345699831, |
| "grad_norm": 0.27640495068779947, |
| "learning_rate": 3.0480949406620862e-05, |
| "loss": 0.3665, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.3558178752107926, |
| "grad_norm": 0.29925492356322403, |
| "learning_rate": 3.0449718925671456e-05, |
| "loss": 0.3606, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.357504215851602, |
| "grad_norm": 0.28679446055488445, |
| "learning_rate": 3.041848844472205e-05, |
| "loss": 0.3653, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.3591905564924116, |
| "grad_norm": 0.31685499931502714, |
| "learning_rate": 3.0387257963772643e-05, |
| "loss": 0.3578, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.3608768971332208, |
| "grad_norm": 0.31709583316510476, |
| "learning_rate": 3.0356027482823236e-05, |
| "loss": 0.3536, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.3625632377740304, |
| "grad_norm": 0.2972307323584074, |
| "learning_rate": 3.0324797001873826e-05, |
| "loss": 0.3453, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.3642495784148398, |
| "grad_norm": 0.2768215969932845, |
| "learning_rate": 3.029356652092442e-05, |
| "loss": 0.3731, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.3659359190556493, |
| "grad_norm": 0.3366256274766151, |
| "learning_rate": 3.0262336039975014e-05, |
| "loss": 0.3598, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.3676222596964587, |
| "grad_norm": 0.35895024828608924, |
| "learning_rate": 3.0231105559025614e-05, |
| "loss": 0.3728, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.369308600337268, |
| "grad_norm": 0.27047287184924756, |
| "learning_rate": 3.0199875078076207e-05, |
| "loss": 0.3557, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.3709949409780775, |
| "grad_norm": 0.3045253938750555, |
| "learning_rate": 3.01686445971268e-05, |
| "loss": 0.3763, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.372681281618887, |
| "grad_norm": 0.3269021401249622, |
| "learning_rate": 3.013741411617739e-05, |
| "loss": 0.3786, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.3743676222596966, |
| "grad_norm": 0.2989257893228668, |
| "learning_rate": 3.0106183635227985e-05, |
| "loss": 0.3549, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.376053962900506, |
| "grad_norm": 0.2902243430545122, |
| "learning_rate": 3.0074953154278578e-05, |
| "loss": 0.3605, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.3777403035413154, |
| "grad_norm": 0.3268375592923771, |
| "learning_rate": 3.0043722673329172e-05, |
| "loss": 0.3554, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.3794266441821248, |
| "grad_norm": 0.28706953208266983, |
| "learning_rate": 3.0012492192379765e-05, |
| "loss": 0.3671, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.3811129848229342, |
| "grad_norm": 0.3288664801504078, |
| "learning_rate": 2.998126171143036e-05, |
| "loss": 0.3681, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.3827993254637436, |
| "grad_norm": 0.2928303438562581, |
| "learning_rate": 2.9950031230480952e-05, |
| "loss": 0.3555, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.384485666104553, |
| "grad_norm": 0.2903552935720318, |
| "learning_rate": 2.9918800749531546e-05, |
| "loss": 0.3532, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.3861720067453627, |
| "grad_norm": 0.3321445685269924, |
| "learning_rate": 2.988757026858214e-05, |
| "loss": 0.3676, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.387858347386172, |
| "grad_norm": 0.2884554947709554, |
| "learning_rate": 2.985633978763273e-05, |
| "loss": 0.3747, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.3895446880269815, |
| "grad_norm": 0.276899181299804, |
| "learning_rate": 2.9825109306683323e-05, |
| "loss": 0.3731, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.391231028667791, |
| "grad_norm": 0.28516809217475486, |
| "learning_rate": 2.9793878825733917e-05, |
| "loss": 0.3644, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.3929173693086003, |
| "grad_norm": 0.3034709425045361, |
| "learning_rate": 2.976264834478451e-05, |
| "loss": 0.3656, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.3946037099494097, |
| "grad_norm": 0.274788776371271, |
| "learning_rate": 2.9731417863835104e-05, |
| "loss": 0.3477, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.3962900505902192, |
| "grad_norm": 0.3180401263617833, |
| "learning_rate": 2.9700187382885697e-05, |
| "loss": 0.3595, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.3979763912310288, |
| "grad_norm": 0.29836311693348744, |
| "learning_rate": 2.966895690193629e-05, |
| "loss": 0.3809, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.399662731871838, |
| "grad_norm": 0.3525823167547023, |
| "learning_rate": 2.9637726420986885e-05, |
| "loss": 0.3619, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4013490725126476, |
| "grad_norm": 0.33175669157893434, |
| "learning_rate": 2.9606495940037475e-05, |
| "loss": 0.3588, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.403035413153457, |
| "grad_norm": 0.3245193475126911, |
| "learning_rate": 2.9575265459088068e-05, |
| "loss": 0.359, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.4047217537942664, |
| "grad_norm": 0.29657466500656987, |
| "learning_rate": 2.9544034978138662e-05, |
| "loss": 0.3465, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.4064080944350759, |
| "grad_norm": 0.3412473670858005, |
| "learning_rate": 2.9512804497189255e-05, |
| "loss": 0.3769, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.4080944350758853, |
| "grad_norm": 0.3216702109422503, |
| "learning_rate": 2.948157401623985e-05, |
| "loss": 0.3908, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.4097807757166947, |
| "grad_norm": 0.3250423656449163, |
| "learning_rate": 2.9450343535290442e-05, |
| "loss": 0.3485, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.411467116357504, |
| "grad_norm": 0.34791591826158985, |
| "learning_rate": 2.941911305434104e-05, |
| "loss": 0.3749, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.4131534569983137, |
| "grad_norm": 0.29886846786700416, |
| "learning_rate": 2.9387882573391633e-05, |
| "loss": 0.3707, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.4148397976391232, |
| "grad_norm": 0.43800507347744627, |
| "learning_rate": 2.9356652092442227e-05, |
| "loss": 0.3585, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.4165261382799326, |
| "grad_norm": 0.3370202282793368, |
| "learning_rate": 2.932542161149282e-05, |
| "loss": 0.3977, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.418212478920742, |
| "grad_norm": 0.3464248680319261, |
| "learning_rate": 2.9294191130543414e-05, |
| "loss": 0.3672, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.4198988195615514, |
| "grad_norm": 0.3329391344410722, |
| "learning_rate": 2.9262960649594007e-05, |
| "loss": 0.3797, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.4215851602023608, |
| "grad_norm": 0.3304159135003874, |
| "learning_rate": 2.92317301686446e-05, |
| "loss": 0.3667, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.4232715008431702, |
| "grad_norm": 0.33726375951261683, |
| "learning_rate": 2.9200499687695194e-05, |
| "loss": 0.3686, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.4249578414839799, |
| "grad_norm": 0.31457486052096356, |
| "learning_rate": 2.9169269206745788e-05, |
| "loss": 0.3549, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.4266441821247893, |
| "grad_norm": 0.313929594764878, |
| "learning_rate": 2.9138038725796378e-05, |
| "loss": 0.3455, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.4283305227655987, |
| "grad_norm": 0.35655405274540297, |
| "learning_rate": 2.910680824484697e-05, |
| "loss": 0.361, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.430016863406408, |
| "grad_norm": 0.3203018948600648, |
| "learning_rate": 2.9075577763897565e-05, |
| "loss": 0.356, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.4317032040472175, |
| "grad_norm": 0.31890031263924845, |
| "learning_rate": 2.904434728294816e-05, |
| "loss": 0.3861, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.433389544688027, |
| "grad_norm": 0.3439661347106031, |
| "learning_rate": 2.9013116801998752e-05, |
| "loss": 0.3691, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4350758853288363, |
| "grad_norm": 0.3287855811474556, |
| "learning_rate": 2.8981886321049346e-05, |
| "loss": 0.3658, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.436762225969646, |
| "grad_norm": 0.3352337738607975, |
| "learning_rate": 2.895065584009994e-05, |
| "loss": 0.3526, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.4384485666104554, |
| "grad_norm": 0.29066523249557324, |
| "learning_rate": 2.8919425359150533e-05, |
| "loss": 0.3665, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.4401349072512648, |
| "grad_norm": 0.6443887765113684, |
| "learning_rate": 2.8888194878201126e-05, |
| "loss": 0.3695, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4418212478920742, |
| "grad_norm": 0.32997585072997554, |
| "learning_rate": 2.8856964397251717e-05, |
| "loss": 0.354, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.4435075885328836, |
| "grad_norm": 0.2783806614720242, |
| "learning_rate": 2.882573391630231e-05, |
| "loss": 0.3849, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.445193929173693, |
| "grad_norm": 0.33474735936565325, |
| "learning_rate": 2.8794503435352904e-05, |
| "loss": 0.3781, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.4468802698145025, |
| "grad_norm": 0.3456639852483754, |
| "learning_rate": 2.8763272954403497e-05, |
| "loss": 0.3666, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.448566610455312, |
| "grad_norm": 0.30135371080737516, |
| "learning_rate": 2.873204247345409e-05, |
| "loss": 0.3629, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.4502529510961213, |
| "grad_norm": 0.3635410618935195, |
| "learning_rate": 2.8700811992504684e-05, |
| "loss": 0.3704, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.451939291736931, |
| "grad_norm": 0.30991001067763485, |
| "learning_rate": 2.8669581511555278e-05, |
| "loss": 0.3727, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.4536256323777403, |
| "grad_norm": 0.4643174111387868, |
| "learning_rate": 2.863835103060587e-05, |
| "loss": 0.3787, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.4553119730185498, |
| "grad_norm": 0.3180200407283694, |
| "learning_rate": 2.8607120549656468e-05, |
| "loss": 0.3602, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.4569983136593592, |
| "grad_norm": 0.35198422859049877, |
| "learning_rate": 2.8575890068707062e-05, |
| "loss": 0.3659, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.4586846543001686, |
| "grad_norm": 0.3959760640216589, |
| "learning_rate": 2.8544659587757655e-05, |
| "loss": 0.3642, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.460370994940978, |
| "grad_norm": 0.27646042574830654, |
| "learning_rate": 2.851342910680825e-05, |
| "loss": 0.3504, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.4620573355817874, |
| "grad_norm": 0.3229912287222973, |
| "learning_rate": 2.8482198625858843e-05, |
| "loss": 0.3537, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.463743676222597, |
| "grad_norm": 0.3308036371563188, |
| "learning_rate": 2.8450968144909436e-05, |
| "loss": 0.3672, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.4654300168634065, |
| "grad_norm": 0.3049717462902101, |
| "learning_rate": 2.8419737663960026e-05, |
| "loss": 0.3501, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.4671163575042159, |
| "grad_norm": 0.276903365881423, |
| "learning_rate": 2.838850718301062e-05, |
| "loss": 0.376, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.4688026981450253, |
| "grad_norm": 0.282087202469682, |
| "learning_rate": 2.8357276702061213e-05, |
| "loss": 0.3458, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.4704890387858347, |
| "grad_norm": 0.2889326442756206, |
| "learning_rate": 2.8326046221111807e-05, |
| "loss": 0.3635, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.4721753794266441, |
| "grad_norm": 0.263485140543098, |
| "learning_rate": 2.82948157401624e-05, |
| "loss": 0.3525, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.4738617200674535, |
| "grad_norm": 0.3017524454565209, |
| "learning_rate": 2.8263585259212994e-05, |
| "loss": 0.3569, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.4755480607082632, |
| "grad_norm": 0.2765623305014126, |
| "learning_rate": 2.8232354778263588e-05, |
| "loss": 0.3593, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.4772344013490726, |
| "grad_norm": 0.3025401132551647, |
| "learning_rate": 2.820112429731418e-05, |
| "loss": 0.3496, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.478920741989882, |
| "grad_norm": 0.30350708989703407, |
| "learning_rate": 2.8169893816364775e-05, |
| "loss": 0.3566, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.4806070826306914, |
| "grad_norm": 0.30107950706745196, |
| "learning_rate": 2.8138663335415365e-05, |
| "loss": 0.3884, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.4822934232715008, |
| "grad_norm": 0.3965538879616542, |
| "learning_rate": 2.810743285446596e-05, |
| "loss": 0.3762, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.4839797639123102, |
| "grad_norm": 0.26770539723536463, |
| "learning_rate": 2.8076202373516552e-05, |
| "loss": 0.3537, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.4856661045531196, |
| "grad_norm": 0.33976305682196745, |
| "learning_rate": 2.8044971892567145e-05, |
| "loss": 0.3651, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.4873524451939293, |
| "grad_norm": 0.2762542589057808, |
| "learning_rate": 2.801374141161774e-05, |
| "loss": 0.3553, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.4890387858347387, |
| "grad_norm": 0.29491397560474647, |
| "learning_rate": 2.7982510930668333e-05, |
| "loss": 0.3741, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.4907251264755481, |
| "grad_norm": 0.3313263751607482, |
| "learning_rate": 2.7951280449718926e-05, |
| "loss": 0.3624, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.4924114671163575, |
| "grad_norm": 0.40279897069448306, |
| "learning_rate": 2.792004996876952e-05, |
| "loss": 0.3854, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.494097807757167, |
| "grad_norm": 0.2995735918065522, |
| "learning_rate": 2.788881948782011e-05, |
| "loss": 0.3852, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.4957841483979764, |
| "grad_norm": 0.4071606342151084, |
| "learning_rate": 2.7857589006870703e-05, |
| "loss": 0.3486, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.4974704890387858, |
| "grad_norm": 0.3891142766885291, |
| "learning_rate": 2.7826358525921297e-05, |
| "loss": 0.3618, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.4991568296795954, |
| "grad_norm": 0.41252059927905615, |
| "learning_rate": 2.7795128044971897e-05, |
| "loss": 0.3861, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.5008431703204046, |
| "grad_norm": 0.37901924320659897, |
| "learning_rate": 2.776389756402249e-05, |
| "loss": 0.3659, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.5025295109612142, |
| "grad_norm": 0.36103653161805105, |
| "learning_rate": 2.7732667083073084e-05, |
| "loss": 0.3828, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.5042158516020236, |
| "grad_norm": 0.2973855077723278, |
| "learning_rate": 2.7701436602123674e-05, |
| "loss": 0.3786, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.505902192242833, |
| "grad_norm": 0.308201431273671, |
| "learning_rate": 2.7670206121174268e-05, |
| "loss": 0.3757, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.5075885328836425, |
| "grad_norm": 0.3157987216748116, |
| "learning_rate": 2.763897564022486e-05, |
| "loss": 0.3704, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.5092748735244519, |
| "grad_norm": 0.30359379634490863, |
| "learning_rate": 2.7607745159275455e-05, |
| "loss": 0.3746, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.5109612141652615, |
| "grad_norm": 0.31541367039281626, |
| "learning_rate": 2.757651467832605e-05, |
| "loss": 0.3537, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.5126475548060707, |
| "grad_norm": 0.28664704951464065, |
| "learning_rate": 2.7545284197376642e-05, |
| "loss": 0.3643, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.5143338954468804, |
| "grad_norm": 0.33654100602922427, |
| "learning_rate": 2.7514053716427236e-05, |
| "loss": 0.3874, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.5160202360876898, |
| "grad_norm": 0.3460798665887633, |
| "learning_rate": 2.748282323547783e-05, |
| "loss": 0.3696, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.5177065767284992, |
| "grad_norm": 0.29319829577254086, |
| "learning_rate": 2.7451592754528423e-05, |
| "loss": 0.3535, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5193929173693086, |
| "grad_norm": 0.30053976752466616, |
| "learning_rate": 2.7420362273579013e-05, |
| "loss": 0.3768, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.521079258010118, |
| "grad_norm": 0.38472344036114325, |
| "learning_rate": 2.7389131792629607e-05, |
| "loss": 0.3668, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.5227655986509276, |
| "grad_norm": 0.2711714617087213, |
| "learning_rate": 2.73579013116802e-05, |
| "loss": 0.3408, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.5244519392917368, |
| "grad_norm": 0.2873701839245859, |
| "learning_rate": 2.7326670830730794e-05, |
| "loss": 0.3658, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.5261382799325465, |
| "grad_norm": 0.29970672448116314, |
| "learning_rate": 2.7295440349781387e-05, |
| "loss": 0.3649, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.5278246205733557, |
| "grad_norm": 0.30526313911571973, |
| "learning_rate": 2.726420986883198e-05, |
| "loss": 0.378, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.5295109612141653, |
| "grad_norm": 0.2928928752135304, |
| "learning_rate": 2.7232979387882574e-05, |
| "loss": 0.3757, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.5311973018549747, |
| "grad_norm": 0.2858263949745689, |
| "learning_rate": 2.7201748906933168e-05, |
| "loss": 0.3659, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.5328836424957841, |
| "grad_norm": 0.3087797513117411, |
| "learning_rate": 2.717051842598376e-05, |
| "loss": 0.3743, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.5345699831365935, |
| "grad_norm": 0.2856603026190315, |
| "learning_rate": 2.713928794503435e-05, |
| "loss": 0.357, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.536256323777403, |
| "grad_norm": 0.3058290048021126, |
| "learning_rate": 2.7108057464084945e-05, |
| "loss": 0.3734, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.5379426644182126, |
| "grad_norm": 0.3104946067619198, |
| "learning_rate": 2.707682698313554e-05, |
| "loss": 0.3568, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.5396290050590218, |
| "grad_norm": 0.26979396016908347, |
| "learning_rate": 2.7045596502186132e-05, |
| "loss": 0.3715, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.5413153456998314, |
| "grad_norm": 0.34743672943385967, |
| "learning_rate": 2.7014366021236733e-05, |
| "loss": 0.3527, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.5430016863406408, |
| "grad_norm": 0.30343903155975266, |
| "learning_rate": 2.6983135540287323e-05, |
| "loss": 0.3559, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.5446880269814502, |
| "grad_norm": 0.32417710032970565, |
| "learning_rate": 2.6951905059337916e-05, |
| "loss": 0.344, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.5463743676222597, |
| "grad_norm": 0.32794367697794385, |
| "learning_rate": 2.692067457838851e-05, |
| "loss": 0.3659, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.548060708263069, |
| "grad_norm": 0.3035455097790254, |
| "learning_rate": 2.6889444097439103e-05, |
| "loss": 0.3656, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.5497470489038787, |
| "grad_norm": 0.29476341967594083, |
| "learning_rate": 2.6858213616489697e-05, |
| "loss": 0.3605, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.551433389544688, |
| "grad_norm": 0.2507995893378595, |
| "learning_rate": 2.682698313554029e-05, |
| "loss": 0.3208, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.5531197301854975, |
| "grad_norm": 0.333329654380094, |
| "learning_rate": 2.6795752654590884e-05, |
| "loss": 0.3865, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.554806070826307, |
| "grad_norm": 0.28035363313722794, |
| "learning_rate": 2.6764522173641478e-05, |
| "loss": 0.3586, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.5564924114671164, |
| "grad_norm": 0.31443387396206257, |
| "learning_rate": 2.673329169269207e-05, |
| "loss": 0.3737, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.5581787521079258, |
| "grad_norm": 0.32875013176555307, |
| "learning_rate": 2.670206121174266e-05, |
| "loss": 0.3399, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.5598650927487352, |
| "grad_norm": 0.31629044557639807, |
| "learning_rate": 2.6670830730793255e-05, |
| "loss": 0.353, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.5615514333895448, |
| "grad_norm": 0.32486698411487475, |
| "learning_rate": 2.663960024984385e-05, |
| "loss": 0.3726, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.563237774030354, |
| "grad_norm": 0.25686723863440886, |
| "learning_rate": 2.6608369768894442e-05, |
| "loss": 0.3714, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.5649241146711637, |
| "grad_norm": 0.2994757555235764, |
| "learning_rate": 2.6577139287945035e-05, |
| "loss": 0.391, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.566610455311973, |
| "grad_norm": 0.26274982624954607, |
| "learning_rate": 2.654590880699563e-05, |
| "loss": 0.368, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.5682967959527825, |
| "grad_norm": 0.2766309841130026, |
| "learning_rate": 2.6514678326046223e-05, |
| "loss": 0.369, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.569983136593592, |
| "grad_norm": 0.24174006935555933, |
| "learning_rate": 2.6483447845096816e-05, |
| "loss": 0.346, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.5716694772344013, |
| "grad_norm": 0.2973914368046073, |
| "learning_rate": 2.645221736414741e-05, |
| "loss": 0.3509, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.573355817875211, |
| "grad_norm": 0.27142951583047953, |
| "learning_rate": 2.6420986883198e-05, |
| "loss": 0.371, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.5750421585160201, |
| "grad_norm": 0.28939375979112236, |
| "learning_rate": 2.6389756402248593e-05, |
| "loss": 0.3606, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.5767284991568298, |
| "grad_norm": 0.3204660725413403, |
| "learning_rate": 2.6358525921299187e-05, |
| "loss": 0.3719, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.578414839797639, |
| "grad_norm": 0.28518308791901786, |
| "learning_rate": 2.632729544034978e-05, |
| "loss": 0.3732, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.5801011804384486, |
| "grad_norm": 0.2840336033242113, |
| "learning_rate": 2.6296064959400374e-05, |
| "loss": 0.3523, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.581787521079258, |
| "grad_norm": 0.2848436033024478, |
| "learning_rate": 2.6264834478450968e-05, |
| "loss": 0.3613, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.5834738617200674, |
| "grad_norm": 0.30573983740828986, |
| "learning_rate": 2.623360399750156e-05, |
| "loss": 0.3639, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.5851602023608768, |
| "grad_norm": 0.3074017715880178, |
| "learning_rate": 2.6202373516552158e-05, |
| "loss": 0.3593, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.5868465430016863, |
| "grad_norm": 0.3670939060165031, |
| "learning_rate": 2.617114303560275e-05, |
| "loss": 0.3732, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.588532883642496, |
| "grad_norm": 0.2815186616157421, |
| "learning_rate": 2.6139912554653345e-05, |
| "loss": 0.376, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.590219224283305, |
| "grad_norm": 0.3288959283244572, |
| "learning_rate": 2.610868207370394e-05, |
| "loss": 0.3859, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.5919055649241147, |
| "grad_norm": 0.3333526511910872, |
| "learning_rate": 2.6077451592754532e-05, |
| "loss": 0.3476, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.5935919055649241, |
| "grad_norm": 0.26153190652604347, |
| "learning_rate": 2.6046221111805126e-05, |
| "loss": 0.3527, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.5952782462057336, |
| "grad_norm": 0.3352855943625819, |
| "learning_rate": 2.601499063085572e-05, |
| "loss": 0.3491, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.596964586846543, |
| "grad_norm": 0.34129225800110075, |
| "learning_rate": 2.598376014990631e-05, |
| "loss": 0.3815, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.5986509274873524, |
| "grad_norm": 0.27613299041658507, |
| "learning_rate": 2.5952529668956903e-05, |
| "loss": 0.371, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.600337268128162, |
| "grad_norm": 0.3466164543787536, |
| "learning_rate": 2.5921299188007497e-05, |
| "loss": 0.3599, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.6020236087689712, |
| "grad_norm": 0.2887317810997904, |
| "learning_rate": 2.589006870705809e-05, |
| "loss": 0.3693, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.6037099494097808, |
| "grad_norm": 0.27325392270711873, |
| "learning_rate": 2.5858838226108684e-05, |
| "loss": 0.3566, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.6053962900505903, |
| "grad_norm": 0.29154218812604826, |
| "learning_rate": 2.5827607745159277e-05, |
| "loss": 0.3656, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.6070826306913997, |
| "grad_norm": 0.27216759043144917, |
| "learning_rate": 2.579637726420987e-05, |
| "loss": 0.3663, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.608768971332209, |
| "grad_norm": 0.24444581005960203, |
| "learning_rate": 2.5765146783260464e-05, |
| "loss": 0.3463, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.6104553119730185, |
| "grad_norm": 0.291335248047702, |
| "learning_rate": 2.5733916302311058e-05, |
| "loss": 0.3697, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.6121416526138281, |
| "grad_norm": 0.2675512533134804, |
| "learning_rate": 2.5702685821361648e-05, |
| "loss": 0.3609, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.6138279932546373, |
| "grad_norm": 0.34548920343457473, |
| "learning_rate": 2.567145534041224e-05, |
| "loss": 0.3699, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.615514333895447, |
| "grad_norm": 0.2742740366615913, |
| "learning_rate": 2.5640224859462835e-05, |
| "loss": 0.3702, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.6172006745362564, |
| "grad_norm": 0.37519505367106626, |
| "learning_rate": 2.560899437851343e-05, |
| "loss": 0.3585, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.6188870151770658, |
| "grad_norm": 0.2752354411311124, |
| "learning_rate": 2.5577763897564022e-05, |
| "loss": 0.3678, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.6205733558178752, |
| "grad_norm": 0.26159230336481026, |
| "learning_rate": 2.5546533416614616e-05, |
| "loss": 0.3481, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.6222596964586846, |
| "grad_norm": 0.29194704056578963, |
| "learning_rate": 2.551530293566521e-05, |
| "loss": 0.3677, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.6239460370994943, |
| "grad_norm": 0.2824926837708255, |
| "learning_rate": 2.5484072454715803e-05, |
| "loss": 0.3802, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.6256323777403034, |
| "grad_norm": 0.26074933605007267, |
| "learning_rate": 2.5452841973766396e-05, |
| "loss": 0.3593, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.627318718381113, |
| "grad_norm": 0.27358390071273747, |
| "learning_rate": 2.5421611492816987e-05, |
| "loss": 0.3634, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.6290050590219223, |
| "grad_norm": 0.26051652985808404, |
| "learning_rate": 2.5390381011867587e-05, |
| "loss": 0.3786, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.630691399662732, |
| "grad_norm": 0.27351362935955126, |
| "learning_rate": 2.535915053091818e-05, |
| "loss": 0.3709, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.6323777403035413, |
| "grad_norm": 0.2730193534490263, |
| "learning_rate": 2.5327920049968774e-05, |
| "loss": 0.3523, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.6340640809443507, |
| "grad_norm": 0.2713613178933639, |
| "learning_rate": 2.5296689569019368e-05, |
| "loss": 0.3754, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.6357504215851602, |
| "grad_norm": 0.2913211147093276, |
| "learning_rate": 2.5265459088069958e-05, |
| "loss": 0.3641, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.6374367622259696, |
| "grad_norm": 0.3176807011629316, |
| "learning_rate": 2.523422860712055e-05, |
| "loss": 0.3644, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.6391231028667792, |
| "grad_norm": 0.2719022706726167, |
| "learning_rate": 2.5202998126171145e-05, |
| "loss": 0.3456, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.6408094435075884, |
| "grad_norm": 0.28072788443759994, |
| "learning_rate": 2.517176764522174e-05, |
| "loss": 0.3569, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.642495784148398, |
| "grad_norm": 0.3402154003990933, |
| "learning_rate": 2.5140537164272332e-05, |
| "loss": 0.3595, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.6441821247892074, |
| "grad_norm": 0.29326577548976807, |
| "learning_rate": 2.5109306683322925e-05, |
| "loss": 0.3692, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6458684654300169, |
| "grad_norm": 0.2851064269963646, |
| "learning_rate": 2.507807620237352e-05, |
| "loss": 0.3476, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.6475548060708263, |
| "grad_norm": 0.30308588717737617, |
| "learning_rate": 2.5046845721424113e-05, |
| "loss": 0.3528, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.6492411467116357, |
| "grad_norm": 0.30446422773851584, |
| "learning_rate": 2.5015615240474706e-05, |
| "loss": 0.3791, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.6509274873524453, |
| "grad_norm": 0.31209376681317047, |
| "learning_rate": 2.4984384759525296e-05, |
| "loss": 0.3726, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.6526138279932545, |
| "grad_norm": 0.2957213372732873, |
| "learning_rate": 2.495315427857589e-05, |
| "loss": 0.3634, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.6543001686340641, |
| "grad_norm": 0.2506278259092563, |
| "learning_rate": 2.4921923797626483e-05, |
| "loss": 0.3562, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.6559865092748736, |
| "grad_norm": 0.26991031572885404, |
| "learning_rate": 2.4890693316677077e-05, |
| "loss": 0.3684, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.657672849915683, |
| "grad_norm": 0.26827169125638395, |
| "learning_rate": 2.485946283572767e-05, |
| "loss": 0.382, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.6593591905564924, |
| "grad_norm": 0.25236793433775045, |
| "learning_rate": 2.4828232354778264e-05, |
| "loss": 0.3781, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.6610455311973018, |
| "grad_norm": 0.2597056655240759, |
| "learning_rate": 2.479700187382886e-05, |
| "loss": 0.358, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.6627318718381114, |
| "grad_norm": 0.26812083288360683, |
| "learning_rate": 2.476577139287945e-05, |
| "loss": 0.3714, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.6644182124789206, |
| "grad_norm": 0.25998888449376045, |
| "learning_rate": 2.4734540911930045e-05, |
| "loss": 0.3558, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.6661045531197303, |
| "grad_norm": 0.33214867440974927, |
| "learning_rate": 2.4703310430980638e-05, |
| "loss": 0.3663, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.6677908937605397, |
| "grad_norm": 0.2787721464162817, |
| "learning_rate": 2.4672079950031232e-05, |
| "loss": 0.3661, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.669477234401349, |
| "grad_norm": 0.29617420303436354, |
| "learning_rate": 2.4640849469081825e-05, |
| "loss": 0.3665, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.6711635750421585, |
| "grad_norm": 0.32312615723880084, |
| "learning_rate": 2.460961898813242e-05, |
| "loss": 0.3981, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.672849915682968, |
| "grad_norm": 0.24323896852960228, |
| "learning_rate": 2.4578388507183012e-05, |
| "loss": 0.3579, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.6745362563237776, |
| "grad_norm": 0.3218908678873722, |
| "learning_rate": 2.4547158026233606e-05, |
| "loss": 0.3884, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.6762225969645868, |
| "grad_norm": 0.27355257653083903, |
| "learning_rate": 2.45159275452842e-05, |
| "loss": 0.3678, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.6779089376053964, |
| "grad_norm": 0.2548595173625908, |
| "learning_rate": 2.448469706433479e-05, |
| "loss": 0.3621, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.6795952782462056, |
| "grad_norm": 0.2671433617750058, |
| "learning_rate": 2.4453466583385383e-05, |
| "loss": 0.3793, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.6812816188870152, |
| "grad_norm": 0.26834322594291415, |
| "learning_rate": 2.4422236102435977e-05, |
| "loss": 0.3736, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.6829679595278246, |
| "grad_norm": 0.2603248087469291, |
| "learning_rate": 2.4391005621486574e-05, |
| "loss": 0.3705, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.684654300168634, |
| "grad_norm": 0.2775103849331025, |
| "learning_rate": 2.4359775140537167e-05, |
| "loss": 0.3632, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.6863406408094435, |
| "grad_norm": 0.2758959222329701, |
| "learning_rate": 2.432854465958776e-05, |
| "loss": 0.3818, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6880269814502529, |
| "grad_norm": 0.273604462240355, |
| "learning_rate": 2.4297314178638354e-05, |
| "loss": 0.3631, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.6897133220910625, |
| "grad_norm": 0.33729218246704507, |
| "learning_rate": 2.4266083697688945e-05, |
| "loss": 0.3783, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.6913996627318717, |
| "grad_norm": 0.2663973993344779, |
| "learning_rate": 2.4234853216739538e-05, |
| "loss": 0.3487, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.6930860033726813, |
| "grad_norm": 0.33574629676201084, |
| "learning_rate": 2.420362273579013e-05, |
| "loss": 0.3686, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.6947723440134908, |
| "grad_norm": 0.2965945796608682, |
| "learning_rate": 2.4172392254840725e-05, |
| "loss": 0.3659, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.6964586846543002, |
| "grad_norm": 0.2832725819779564, |
| "learning_rate": 2.414116177389132e-05, |
| "loss": 0.3719, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.6981450252951096, |
| "grad_norm": 0.2660921300880089, |
| "learning_rate": 2.4109931292941912e-05, |
| "loss": 0.3627, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.699831365935919, |
| "grad_norm": 0.2328821109494059, |
| "learning_rate": 2.4078700811992506e-05, |
| "loss": 0.363, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.7015177065767286, |
| "grad_norm": 0.26673695583540513, |
| "learning_rate": 2.40474703310431e-05, |
| "loss": 0.3613, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.7032040472175378, |
| "grad_norm": 0.2593262439807181, |
| "learning_rate": 2.4016239850093693e-05, |
| "loss": 0.3667, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.7048903878583475, |
| "grad_norm": 0.27590667527210233, |
| "learning_rate": 2.3985009369144286e-05, |
| "loss": 0.3699, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.7065767284991569, |
| "grad_norm": 0.2763140503235593, |
| "learning_rate": 2.395377888819488e-05, |
| "loss": 0.371, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.7082630691399663, |
| "grad_norm": 0.2578085278220634, |
| "learning_rate": 2.3922548407245474e-05, |
| "loss": 0.3474, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.7099494097807757, |
| "grad_norm": 0.2761385598378717, |
| "learning_rate": 2.3891317926296067e-05, |
| "loss": 0.3625, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.7116357504215851, |
| "grad_norm": 0.27597852351892443, |
| "learning_rate": 2.386008744534666e-05, |
| "loss": 0.3747, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.7133220910623947, |
| "grad_norm": 0.2982888554929235, |
| "learning_rate": 2.3828856964397254e-05, |
| "loss": 0.3596, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.715008431703204, |
| "grad_norm": 0.2614376301845927, |
| "learning_rate": 2.3797626483447848e-05, |
| "loss": 0.3537, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.7166947723440136, |
| "grad_norm": 1.3930221257075153, |
| "learning_rate": 2.3766396002498438e-05, |
| "loss": 0.4185, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.718381112984823, |
| "grad_norm": 0.3795613329435081, |
| "learning_rate": 2.373516552154903e-05, |
| "loss": 0.3562, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.7200674536256324, |
| "grad_norm": 0.3056080974956922, |
| "learning_rate": 2.3703935040599625e-05, |
| "loss": 0.3649, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.7217537942664418, |
| "grad_norm": 0.4000237187674837, |
| "learning_rate": 2.367270455965022e-05, |
| "loss": 0.3667, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.7234401349072512, |
| "grad_norm": 0.33463335739266226, |
| "learning_rate": 2.3641474078700812e-05, |
| "loss": 0.3564, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.7251264755480609, |
| "grad_norm": 0.3485535996212191, |
| "learning_rate": 2.3610243597751406e-05, |
| "loss": 0.3786, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.72681281618887, |
| "grad_norm": 0.3987241043836055, |
| "learning_rate": 2.3579013116802003e-05, |
| "loss": 0.3687, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.7284991568296797, |
| "grad_norm": 0.3754341036793299, |
| "learning_rate": 2.3547782635852593e-05, |
| "loss": 0.3746, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.7301854974704889, |
| "grad_norm": 0.29602199276671504, |
| "learning_rate": 2.3516552154903186e-05, |
| "loss": 0.376, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.7318718381112985, |
| "grad_norm": 0.2863523824664361, |
| "learning_rate": 2.348532167395378e-05, |
| "loss": 0.366, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.733558178752108, |
| "grad_norm": 0.3706051157797791, |
| "learning_rate": 2.3454091193004373e-05, |
| "loss": 0.3653, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.7352445193929174, |
| "grad_norm": 0.29526649266114907, |
| "learning_rate": 2.3422860712054967e-05, |
| "loss": 0.3693, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.7369308600337268, |
| "grad_norm": 0.3305499627842152, |
| "learning_rate": 2.339163023110556e-05, |
| "loss": 0.3703, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.7386172006745362, |
| "grad_norm": 0.3222780067528204, |
| "learning_rate": 2.3360399750156154e-05, |
| "loss": 0.3448, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.7403035413153458, |
| "grad_norm": 0.311694614643851, |
| "learning_rate": 2.3329169269206748e-05, |
| "loss": 0.3601, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.741989881956155, |
| "grad_norm": 0.27323544168500447, |
| "learning_rate": 2.329793878825734e-05, |
| "loss": 0.3603, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.7436762225969646, |
| "grad_norm": 0.3555613719712465, |
| "learning_rate": 2.326670830730793e-05, |
| "loss": 0.3685, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.745362563237774, |
| "grad_norm": 0.33155498613569256, |
| "learning_rate": 2.3235477826358525e-05, |
| "loss": 0.3545, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.7470489038785835, |
| "grad_norm": 0.2791464988529287, |
| "learning_rate": 2.320424734540912e-05, |
| "loss": 0.3726, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.7487352445193929, |
| "grad_norm": 0.3422453820402333, |
| "learning_rate": 2.3173016864459715e-05, |
| "loss": 0.3531, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.7504215851602023, |
| "grad_norm": 0.29329487796063386, |
| "learning_rate": 2.314178638351031e-05, |
| "loss": 0.3534, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.752107925801012, |
| "grad_norm": 0.29710324626541224, |
| "learning_rate": 2.3110555902560902e-05, |
| "loss": 0.3697, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.7537942664418211, |
| "grad_norm": 0.2939041175358189, |
| "learning_rate": 2.3079325421611496e-05, |
| "loss": 0.3869, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.7554806070826308, |
| "grad_norm": 0.34204566971638944, |
| "learning_rate": 2.3048094940662086e-05, |
| "loss": 0.3587, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.7571669477234402, |
| "grad_norm": 0.31682768420537266, |
| "learning_rate": 2.301686445971268e-05, |
| "loss": 0.353, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.7588532883642496, |
| "grad_norm": 0.29485949755465213, |
| "learning_rate": 2.2985633978763273e-05, |
| "loss": 0.3617, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.760539629005059, |
| "grad_norm": 0.3475504060072616, |
| "learning_rate": 2.2954403497813867e-05, |
| "loss": 0.37, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.7622259696458684, |
| "grad_norm": 0.2922791776067849, |
| "learning_rate": 2.292317301686446e-05, |
| "loss": 0.3627, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.763912310286678, |
| "grad_norm": 1.1149661206969306, |
| "learning_rate": 2.2891942535915054e-05, |
| "loss": 0.39, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.7655986509274872, |
| "grad_norm": 0.29850221752863587, |
| "learning_rate": 2.2860712054965647e-05, |
| "loss": 0.3648, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.7672849915682969, |
| "grad_norm": 0.28451789499029356, |
| "learning_rate": 2.282948157401624e-05, |
| "loss": 0.3665, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.768971332209106, |
| "grad_norm": 0.279381167685199, |
| "learning_rate": 2.2798251093066835e-05, |
| "loss": 0.353, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.7706576728499157, |
| "grad_norm": 0.3111591163555094, |
| "learning_rate": 2.2767020612117428e-05, |
| "loss": 0.3564, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.7723440134907251, |
| "grad_norm": 0.3282013878697267, |
| "learning_rate": 2.273579013116802e-05, |
| "loss": 0.3592, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.7740303541315345, |
| "grad_norm": 0.2743211155501384, |
| "learning_rate": 2.2704559650218615e-05, |
| "loss": 0.3772, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.7757166947723442, |
| "grad_norm": 0.3068658384779893, |
| "learning_rate": 2.267332916926921e-05, |
| "loss": 0.3525, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.7774030354131534, |
| "grad_norm": 0.3821620228832803, |
| "learning_rate": 2.2642098688319802e-05, |
| "loss": 0.3868, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.779089376053963, |
| "grad_norm": 0.2984635934516873, |
| "learning_rate": 2.2610868207370396e-05, |
| "loss": 0.3601, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.7807757166947722, |
| "grad_norm": 0.30801597653766255, |
| "learning_rate": 2.257963772642099e-05, |
| "loss": 0.3714, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.7824620573355818, |
| "grad_norm": 0.3596227923255594, |
| "learning_rate": 2.254840724547158e-05, |
| "loss": 0.3727, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.7841483979763912, |
| "grad_norm": 0.32033504944868557, |
| "learning_rate": 2.2517176764522173e-05, |
| "loss": 0.3731, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.7858347386172007, |
| "grad_norm": 0.2953834413434869, |
| "learning_rate": 2.2485946283572767e-05, |
| "loss": 0.3565, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.78752107925801, |
| "grad_norm": 0.33100873485129334, |
| "learning_rate": 2.245471580262336e-05, |
| "loss": 0.3672, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.7892074198988195, |
| "grad_norm": 0.28300168443811613, |
| "learning_rate": 2.2423485321673954e-05, |
| "loss": 0.3453, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.7908937605396291, |
| "grad_norm": 0.26796245492445664, |
| "learning_rate": 2.2392254840724547e-05, |
| "loss": 0.3421, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.7925801011804383, |
| "grad_norm": 0.28436317611406275, |
| "learning_rate": 2.2361024359775144e-05, |
| "loss": 0.3685, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.794266441821248, |
| "grad_norm": 0.3044813972327036, |
| "learning_rate": 2.2329793878825734e-05, |
| "loss": 0.3761, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.7959527824620574, |
| "grad_norm": 0.2921388581676714, |
| "learning_rate": 2.2298563397876328e-05, |
| "loss": 0.3645, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.7976391231028668, |
| "grad_norm": 0.29626567597084796, |
| "learning_rate": 2.226733291692692e-05, |
| "loss": 0.3482, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.7993254637436762, |
| "grad_norm": 0.27624975007783753, |
| "learning_rate": 2.2236102435977515e-05, |
| "loss": 0.3495, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.8010118043844856, |
| "grad_norm": 0.28624573915638696, |
| "learning_rate": 2.220487195502811e-05, |
| "loss": 0.3599, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.8026981450252952, |
| "grad_norm": 0.2841114493990201, |
| "learning_rate": 2.2173641474078702e-05, |
| "loss": 0.3539, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.8043844856661044, |
| "grad_norm": 0.31601341314333714, |
| "learning_rate": 2.2142410993129296e-05, |
| "loss": 0.3411, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.806070826306914, |
| "grad_norm": 0.25856361379287507, |
| "learning_rate": 2.211118051217989e-05, |
| "loss": 0.3566, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.8077571669477235, |
| "grad_norm": 0.3116983391184263, |
| "learning_rate": 2.2079950031230483e-05, |
| "loss": 0.3679, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.809443507588533, |
| "grad_norm": 0.32306943398684707, |
| "learning_rate": 2.2048719550281073e-05, |
| "loss": 0.3678, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.8111298482293423, |
| "grad_norm": 0.2925451620866593, |
| "learning_rate": 2.2017489069331666e-05, |
| "loss": 0.3672, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.8128161888701517, |
| "grad_norm": 0.3335180580412719, |
| "learning_rate": 2.198625858838226e-05, |
| "loss": 0.3593, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.8145025295109614, |
| "grad_norm": 0.2895810667226106, |
| "learning_rate": 2.1955028107432857e-05, |
| "loss": 0.3521, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.8161888701517706, |
| "grad_norm": 0.2815963912814418, |
| "learning_rate": 2.192379762648345e-05, |
| "loss": 0.3632, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.8178752107925802, |
| "grad_norm": 0.2868864309832322, |
| "learning_rate": 2.1892567145534044e-05, |
| "loss": 0.3489, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.8195615514333894, |
| "grad_norm": 0.29441658382664976, |
| "learning_rate": 2.1861336664584638e-05, |
| "loss": 0.3657, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.821247892074199, |
| "grad_norm": 0.31297489563548325, |
| "learning_rate": 2.1830106183635228e-05, |
| "loss": 0.3504, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.8229342327150084, |
| "grad_norm": 0.27799837753213885, |
| "learning_rate": 2.179887570268582e-05, |
| "loss": 0.3689, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.8246205733558178, |
| "grad_norm": 0.28878031053711806, |
| "learning_rate": 2.1767645221736415e-05, |
| "loss": 0.3571, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.8263069139966275, |
| "grad_norm": 0.2932352697387466, |
| "learning_rate": 2.173641474078701e-05, |
| "loss": 0.3587, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.8279932546374367, |
| "grad_norm": 0.2685486954287236, |
| "learning_rate": 2.1705184259837602e-05, |
| "loss": 0.3835, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.8296795952782463, |
| "grad_norm": 0.2909854441326301, |
| "learning_rate": 2.1673953778888196e-05, |
| "loss": 0.3485, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.8313659359190555, |
| "grad_norm": 0.29465094304956874, |
| "learning_rate": 2.164272329793879e-05, |
| "loss": 0.3682, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.8330522765598651, |
| "grad_norm": 0.2707187489413735, |
| "learning_rate": 2.1611492816989383e-05, |
| "loss": 0.3736, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.8347386172006745, |
| "grad_norm": 0.30564282864344905, |
| "learning_rate": 2.1580262336039976e-05, |
| "loss": 0.3636, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.836424957841484, |
| "grad_norm": 0.8763246852809086, |
| "learning_rate": 2.154903185509057e-05, |
| "loss": 0.4001, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.8381112984822934, |
| "grad_norm": 0.2671171226002439, |
| "learning_rate": 2.1517801374141163e-05, |
| "loss": 0.3456, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.8397976391231028, |
| "grad_norm": 0.2820484363666512, |
| "learning_rate": 2.1486570893191757e-05, |
| "loss": 0.3737, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.8414839797639124, |
| "grad_norm": 0.2829569572495032, |
| "learning_rate": 2.145534041224235e-05, |
| "loss": 0.373, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.8431703204047216, |
| "grad_norm": 0.2655606395124247, |
| "learning_rate": 2.1424109931292944e-05, |
| "loss": 0.3513, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.8448566610455313, |
| "grad_norm": 0.30954298285785775, |
| "learning_rate": 2.1392879450343537e-05, |
| "loss": 0.3697, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.8465430016863407, |
| "grad_norm": 0.2735055996514486, |
| "learning_rate": 2.136164896939413e-05, |
| "loss": 0.343, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.84822934232715, |
| "grad_norm": 0.2743003208816105, |
| "learning_rate": 2.133041848844472e-05, |
| "loss": 0.3511, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.8499156829679595, |
| "grad_norm": 0.29353957623248444, |
| "learning_rate": 2.1299188007495315e-05, |
| "loss": 0.3721, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.851602023608769, |
| "grad_norm": 0.2908904768338592, |
| "learning_rate": 2.1267957526545908e-05, |
| "loss": 0.3653, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.8532883642495785, |
| "grad_norm": 0.26714340843376727, |
| "learning_rate": 2.1236727045596502e-05, |
| "loss": 0.3584, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.8549747048903877, |
| "grad_norm": 0.33059642792892746, |
| "learning_rate": 2.1205496564647095e-05, |
| "loss": 0.382, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8566610455311974, |
| "grad_norm": 0.2894330220537914, |
| "learning_rate": 2.117426608369769e-05, |
| "loss": 0.3672, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.8583473861720068, |
| "grad_norm": 0.3197462143084355, |
| "learning_rate": 2.1143035602748286e-05, |
| "loss": 0.3898, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.8600337268128162, |
| "grad_norm": 0.27355447071343325, |
| "learning_rate": 2.1111805121798876e-05, |
| "loss": 0.3719, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.8617200674536256, |
| "grad_norm": 0.30736784785079163, |
| "learning_rate": 2.108057464084947e-05, |
| "loss": 0.3761, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.863406408094435, |
| "grad_norm": 0.260778455212016, |
| "learning_rate": 2.1049344159900063e-05, |
| "loss": 0.3547, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.8650927487352447, |
| "grad_norm": 0.27879425279471226, |
| "learning_rate": 2.1018113678950657e-05, |
| "loss": 0.3586, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.8667790893760539, |
| "grad_norm": 0.25377663369915016, |
| "learning_rate": 2.098688319800125e-05, |
| "loss": 0.3592, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.8684654300168635, |
| "grad_norm": 0.3065184311266967, |
| "learning_rate": 2.0955652717051844e-05, |
| "loss": 0.3531, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.8701517706576727, |
| "grad_norm": 0.27223437514527493, |
| "learning_rate": 2.0924422236102437e-05, |
| "loss": 0.3662, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.8718381112984823, |
| "grad_norm": 0.29624318973193403, |
| "learning_rate": 2.089319175515303e-05, |
| "loss": 0.3584, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.8735244519392917, |
| "grad_norm": 0.25284920706159414, |
| "learning_rate": 2.0861961274203624e-05, |
| "loss": 0.3542, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.8752107925801011, |
| "grad_norm": 0.2740076078757696, |
| "learning_rate": 2.0830730793254215e-05, |
| "loss": 0.3511, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.8768971332209108, |
| "grad_norm": 0.296603469029254, |
| "learning_rate": 2.0799500312304808e-05, |
| "loss": 0.3593, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.87858347386172, |
| "grad_norm": 0.3101924174752339, |
| "learning_rate": 2.07682698313554e-05, |
| "loss": 0.3629, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.8802698145025296, |
| "grad_norm": 0.2698704731333751, |
| "learning_rate": 2.0737039350406e-05, |
| "loss": 0.3511, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.8819561551433388, |
| "grad_norm": 0.2495696479572268, |
| "learning_rate": 2.0705808869456592e-05, |
| "loss": 0.3623, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.8836424957841484, |
| "grad_norm": 0.3025959336361255, |
| "learning_rate": 2.0674578388507186e-05, |
| "loss": 0.3632, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.8853288364249579, |
| "grad_norm": 0.3420981543369168, |
| "learning_rate": 2.064334790755778e-05, |
| "loss": 0.3561, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.8870151770657673, |
| "grad_norm": 0.273069642295159, |
| "learning_rate": 2.061211742660837e-05, |
| "loss": 0.3574, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.8887015177065767, |
| "grad_norm": 0.30195405718159296, |
| "learning_rate": 2.0580886945658963e-05, |
| "loss": 0.3565, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.890387858347386, |
| "grad_norm": 0.3207160807438318, |
| "learning_rate": 2.0549656464709557e-05, |
| "loss": 0.3706, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.8920741989881957, |
| "grad_norm": 0.27432257864150217, |
| "learning_rate": 2.051842598376015e-05, |
| "loss": 0.3692, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.893760539629005, |
| "grad_norm": 0.2803399274639525, |
| "learning_rate": 2.0487195502810744e-05, |
| "loss": 0.3774, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.8954468802698146, |
| "grad_norm": 0.274785479790295, |
| "learning_rate": 2.0455965021861337e-05, |
| "loss": 0.3572, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.897133220910624, |
| "grad_norm": 0.2919292690535732, |
| "learning_rate": 2.042473454091193e-05, |
| "loss": 0.3589, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.8988195615514334, |
| "grad_norm": 0.2493810185630252, |
| "learning_rate": 2.0393504059962524e-05, |
| "loss": 0.3666, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.9005059021922428, |
| "grad_norm": 0.3122422734266926, |
| "learning_rate": 2.0362273579013118e-05, |
| "loss": 0.3687, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.9021922428330522, |
| "grad_norm": 0.2833644627593553, |
| "learning_rate": 2.033104309806371e-05, |
| "loss": 0.3721, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.9038785834738619, |
| "grad_norm": 0.24682430613475206, |
| "learning_rate": 2.0299812617114305e-05, |
| "loss": 0.3481, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.905564924114671, |
| "grad_norm": 0.29373193111963974, |
| "learning_rate": 2.02685821361649e-05, |
| "loss": 0.3652, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.9072512647554807, |
| "grad_norm": 0.2488116465798772, |
| "learning_rate": 2.0237351655215492e-05, |
| "loss": 0.348, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.90893760539629, |
| "grad_norm": 0.29044014614137137, |
| "learning_rate": 2.0206121174266086e-05, |
| "loss": 0.3393, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.9106239460370995, |
| "grad_norm": 0.3110171656934698, |
| "learning_rate": 2.017489069331668e-05, |
| "loss": 0.3862, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.912310286677909, |
| "grad_norm": 0.2580764691582828, |
| "learning_rate": 2.0143660212367273e-05, |
| "loss": 0.3638, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.9139966273187183, |
| "grad_norm": 0.2792732755590291, |
| "learning_rate": 2.0112429731417863e-05, |
| "loss": 0.337, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.915682967959528, |
| "grad_norm": 0.29912970237260444, |
| "learning_rate": 2.0081199250468456e-05, |
| "loss": 0.3644, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.9173693086003372, |
| "grad_norm": 0.25447150765707793, |
| "learning_rate": 2.004996876951905e-05, |
| "loss": 0.3513, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.9190556492411468, |
| "grad_norm": 0.2747588787195848, |
| "learning_rate": 2.0018738288569643e-05, |
| "loss": 0.3719, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.920741989881956, |
| "grad_norm": 0.2540705730030083, |
| "learning_rate": 1.9987507807620237e-05, |
| "loss": 0.3551, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.9224283305227656, |
| "grad_norm": 0.2417593824707377, |
| "learning_rate": 1.9956277326670834e-05, |
| "loss": 0.3738, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.924114671163575, |
| "grad_norm": 0.23181361516056662, |
| "learning_rate": 1.9925046845721428e-05, |
| "loss": 0.3472, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.9258010118043845, |
| "grad_norm": 0.30125286349549063, |
| "learning_rate": 1.989381636477202e-05, |
| "loss": 0.3662, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.927487352445194, |
| "grad_norm": 0.2504368430085182, |
| "learning_rate": 1.986258588382261e-05, |
| "loss": 0.3375, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.9291736930860033, |
| "grad_norm": 0.24925738519535062, |
| "learning_rate": 1.9831355402873205e-05, |
| "loss": 0.3752, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.930860033726813, |
| "grad_norm": 0.2573957501588105, |
| "learning_rate": 1.98001249219238e-05, |
| "loss": 0.3623, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.932546374367622, |
| "grad_norm": 0.25691872404772453, |
| "learning_rate": 1.9768894440974392e-05, |
| "loss": 0.3694, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.9342327150084317, |
| "grad_norm": 0.28696277976052026, |
| "learning_rate": 1.9737663960024985e-05, |
| "loss": 0.3537, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.9359190556492412, |
| "grad_norm": 0.25620992572730106, |
| "learning_rate": 1.970643347907558e-05, |
| "loss": 0.3666, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.9376053962900506, |
| "grad_norm": 0.25427258057854996, |
| "learning_rate": 1.9675202998126173e-05, |
| "loss": 0.3456, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.93929173693086, |
| "grad_norm": 0.2359437378592525, |
| "learning_rate": 1.9643972517176766e-05, |
| "loss": 0.3527, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.9409780775716694, |
| "grad_norm": 0.24853620134111262, |
| "learning_rate": 1.9612742036227356e-05, |
| "loss": 0.3539, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.942664418212479, |
| "grad_norm": 0.24474109975464906, |
| "learning_rate": 1.958151155527795e-05, |
| "loss": 0.357, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.9443507588532882, |
| "grad_norm": 0.25479373707248376, |
| "learning_rate": 1.9550281074328547e-05, |
| "loss": 0.3741, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.9460370994940979, |
| "grad_norm": 0.22999459116070575, |
| "learning_rate": 1.951905059337914e-05, |
| "loss": 0.3458, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.9477234401349073, |
| "grad_norm": 0.2818946765325796, |
| "learning_rate": 1.9487820112429734e-05, |
| "loss": 0.3597, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.9494097807757167, |
| "grad_norm": 0.2520481240185905, |
| "learning_rate": 1.9456589631480327e-05, |
| "loss": 0.3608, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.951096121416526, |
| "grad_norm": 0.2732606635337268, |
| "learning_rate": 1.942535915053092e-05, |
| "loss": 0.3732, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.9527824620573355, |
| "grad_norm": 0.3059595096865597, |
| "learning_rate": 1.939412866958151e-05, |
| "loss": 0.3598, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.9544688026981452, |
| "grad_norm": 0.26107842993336405, |
| "learning_rate": 1.9362898188632105e-05, |
| "loss": 0.3595, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.9561551433389543, |
| "grad_norm": 0.26997037532654433, |
| "learning_rate": 1.9331667707682698e-05, |
| "loss": 0.3556, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.957841483979764, |
| "grad_norm": 0.29676884514372204, |
| "learning_rate": 1.9300437226733292e-05, |
| "loss": 0.3637, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.9595278246205734, |
| "grad_norm": 0.2891548777760464, |
| "learning_rate": 1.9269206745783885e-05, |
| "loss": 0.3643, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.9612141652613828, |
| "grad_norm": 0.3111146178069649, |
| "learning_rate": 1.923797626483448e-05, |
| "loss": 0.3469, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.9629005059021922, |
| "grad_norm": 0.290593840910139, |
| "learning_rate": 1.9206745783885072e-05, |
| "loss": 0.3646, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.9645868465430016, |
| "grad_norm": 0.30031395432300506, |
| "learning_rate": 1.9175515302935666e-05, |
| "loss": 0.3709, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.9662731871838113, |
| "grad_norm": 0.28471179249901485, |
| "learning_rate": 1.914428482198626e-05, |
| "loss": 0.382, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.9679595278246205, |
| "grad_norm": 0.2725996893631143, |
| "learning_rate": 1.9113054341036853e-05, |
| "loss": 0.3798, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.96964586846543, |
| "grad_norm": 0.33070917660443727, |
| "learning_rate": 1.9081823860087447e-05, |
| "loss": 0.3612, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.9713322091062393, |
| "grad_norm": 0.27273598100922003, |
| "learning_rate": 1.905059337913804e-05, |
| "loss": 0.3516, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.973018549747049, |
| "grad_norm": 0.250304283272736, |
| "learning_rate": 1.9019362898188634e-05, |
| "loss": 0.3628, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.9747048903878583, |
| "grad_norm": 0.25447789264283865, |
| "learning_rate": 1.8988132417239227e-05, |
| "loss": 0.3427, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.9763912310286678, |
| "grad_norm": 0.26147916698816104, |
| "learning_rate": 1.895690193628982e-05, |
| "loss": 0.3609, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.9780775716694774, |
| "grad_norm": 0.84319269153612, |
| "learning_rate": 1.8925671455340414e-05, |
| "loss": 0.3658, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.9797639123102866, |
| "grad_norm": 0.24524151384565965, |
| "learning_rate": 1.8894440974391004e-05, |
| "loss": 0.3469, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.9814502529510962, |
| "grad_norm": 0.27630607979162247, |
| "learning_rate": 1.8863210493441598e-05, |
| "loss": 0.3799, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.9831365935919054, |
| "grad_norm": 0.3062796051194247, |
| "learning_rate": 1.883198001249219e-05, |
| "loss": 0.3804, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.984822934232715, |
| "grad_norm": 0.23663568250522882, |
| "learning_rate": 1.8800749531542785e-05, |
| "loss": 0.3579, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.9865092748735245, |
| "grad_norm": 0.2839785085773881, |
| "learning_rate": 1.876951905059338e-05, |
| "loss": 0.3728, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.9881956155143339, |
| "grad_norm": 0.24299912642691168, |
| "learning_rate": 1.8738288569643976e-05, |
| "loss": 0.3531, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.9898819561551433, |
| "grad_norm": 0.2848817170382811, |
| "learning_rate": 1.870705808869457e-05, |
| "loss": 0.3639, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.9915682967959527, |
| "grad_norm": 0.2735374219071693, |
| "learning_rate": 1.8675827607745163e-05, |
| "loss": 0.3725, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.9932546374367623, |
| "grad_norm": 0.26362036054219906, |
| "learning_rate": 1.8644597126795753e-05, |
| "loss": 0.3523, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.9949409780775715, |
| "grad_norm": 0.2781904905926228, |
| "learning_rate": 1.8613366645846346e-05, |
| "loss": 0.3632, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.9966273187183812, |
| "grad_norm": 0.28555430611153043, |
| "learning_rate": 1.858213616489694e-05, |
| "loss": 0.3423, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.9983136593591906, |
| "grad_norm": 0.2925206020361668, |
| "learning_rate": 1.8550905683947534e-05, |
| "loss": 0.3602, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.2906446874370902, |
| "learning_rate": 1.8519675202998127e-05, |
| "loss": 0.3512, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.0016863406408096, |
| "grad_norm": 0.3307751271667645, |
| "learning_rate": 1.848844472204872e-05, |
| "loss": 0.2956, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.003372681281619, |
| "grad_norm": 0.29575692997643666, |
| "learning_rate": 1.8457214241099314e-05, |
| "loss": 0.2939, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.0050590219224285, |
| "grad_norm": 0.369446717074265, |
| "learning_rate": 1.8425983760149908e-05, |
| "loss": 0.2965, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.0067453625632377, |
| "grad_norm": 0.30301462667295276, |
| "learning_rate": 1.8394753279200498e-05, |
| "loss": 0.291, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.0084317032040473, |
| "grad_norm": 0.3316839260298742, |
| "learning_rate": 1.836352279825109e-05, |
| "loss": 0.2778, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.0101180438448565, |
| "grad_norm": 0.3335466364498099, |
| "learning_rate": 1.833229231730169e-05, |
| "loss": 0.2955, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.011804384485666, |
| "grad_norm": 0.28377162554836055, |
| "learning_rate": 1.8301061836352282e-05, |
| "loss": 0.281, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.0134907251264758, |
| "grad_norm": 0.3022140584533395, |
| "learning_rate": 1.8269831355402875e-05, |
| "loss": 0.2903, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.015177065767285, |
| "grad_norm": 0.29880763730980237, |
| "learning_rate": 1.823860087445347e-05, |
| "loss": 0.2836, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.0168634064080946, |
| "grad_norm": 0.2793211888777449, |
| "learning_rate": 1.8207370393504063e-05, |
| "loss": 0.2804, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.0185497470489038, |
| "grad_norm": 0.2791483101159237, |
| "learning_rate": 1.8176139912554656e-05, |
| "loss": 0.2851, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.0202360876897134, |
| "grad_norm": 0.28203865877543277, |
| "learning_rate": 1.8144909431605246e-05, |
| "loss": 0.2897, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.0219224283305226, |
| "grad_norm": 0.30036531151809387, |
| "learning_rate": 1.811367895065584e-05, |
| "loss": 0.2816, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.0236087689713322, |
| "grad_norm": 0.26783802520337824, |
| "learning_rate": 1.8082448469706433e-05, |
| "loss": 0.2896, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0252951096121414, |
| "grad_norm": 0.2727196129087922, |
| "learning_rate": 1.8051217988757027e-05, |
| "loss": 0.2914, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.026981450252951, |
| "grad_norm": 0.27693857289448, |
| "learning_rate": 1.801998750780762e-05, |
| "loss": 0.2905, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.0286677908937607, |
| "grad_norm": 0.2439937354147468, |
| "learning_rate": 1.7988757026858214e-05, |
| "loss": 0.2808, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.03035413153457, |
| "grad_norm": 0.25874712164329317, |
| "learning_rate": 1.7957526545908808e-05, |
| "loss": 0.2942, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.0320404721753795, |
| "grad_norm": 0.2853597585585357, |
| "learning_rate": 1.79262960649594e-05, |
| "loss": 0.2937, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.0337268128161887, |
| "grad_norm": 0.27253686049864834, |
| "learning_rate": 1.7895065584009995e-05, |
| "loss": 0.2873, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.0354131534569984, |
| "grad_norm": 0.2590500678500842, |
| "learning_rate": 1.7863835103060588e-05, |
| "loss": 0.2908, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.0370994940978076, |
| "grad_norm": 0.23581659556462323, |
| "learning_rate": 1.7832604622111182e-05, |
| "loss": 0.2871, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.038785834738617, |
| "grad_norm": 0.26532251878749463, |
| "learning_rate": 1.7801374141161775e-05, |
| "loss": 0.288, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.040472175379427, |
| "grad_norm": 0.2284343621608884, |
| "learning_rate": 1.777014366021237e-05, |
| "loss": 0.284, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.042158516020236, |
| "grad_norm": 0.27859804430246, |
| "learning_rate": 1.7738913179262962e-05, |
| "loss": 0.2824, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.0438448566610457, |
| "grad_norm": 0.25913110830168806, |
| "learning_rate": 1.7707682698313556e-05, |
| "loss": 0.3001, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.045531197301855, |
| "grad_norm": 0.26958477440693623, |
| "learning_rate": 1.767645221736415e-05, |
| "loss": 0.2966, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.0472175379426645, |
| "grad_norm": 0.2658355243895097, |
| "learning_rate": 1.764522173641474e-05, |
| "loss": 0.2803, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.0489038785834737, |
| "grad_norm": 0.23455281497306474, |
| "learning_rate": 1.7613991255465333e-05, |
| "loss": 0.2955, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.0505902192242833, |
| "grad_norm": 0.23598863217854707, |
| "learning_rate": 1.7582760774515927e-05, |
| "loss": 0.2798, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.052276559865093, |
| "grad_norm": 0.23974716737829616, |
| "learning_rate": 1.755153029356652e-05, |
| "loss": 0.291, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.053962900505902, |
| "grad_norm": 0.25591362950142704, |
| "learning_rate": 1.7520299812617117e-05, |
| "loss": 0.2941, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.0556492411467118, |
| "grad_norm": 0.24126270893781737, |
| "learning_rate": 1.748906933166771e-05, |
| "loss": 0.296, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.057335581787521, |
| "grad_norm": 0.24592681706462488, |
| "learning_rate": 1.7457838850718304e-05, |
| "loss": 0.2922, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.0590219224283306, |
| "grad_norm": 0.2493972413820514, |
| "learning_rate": 1.7426608369768894e-05, |
| "loss": 0.2934, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.06070826306914, |
| "grad_norm": 0.2240178832747126, |
| "learning_rate": 1.7395377888819488e-05, |
| "loss": 0.2772, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.0623946037099494, |
| "grad_norm": 0.24194577851657278, |
| "learning_rate": 1.736414740787008e-05, |
| "loss": 0.2917, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.064080944350759, |
| "grad_norm": 0.250309298786314, |
| "learning_rate": 1.7332916926920675e-05, |
| "loss": 0.2945, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.0657672849915683, |
| "grad_norm": 0.21954443072464697, |
| "learning_rate": 1.730168644597127e-05, |
| "loss": 0.2683, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.067453625632378, |
| "grad_norm": 0.24070761070270455, |
| "learning_rate": 1.7270455965021862e-05, |
| "loss": 0.2898, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.069139966273187, |
| "grad_norm": 0.23195736444889098, |
| "learning_rate": 1.7239225484072456e-05, |
| "loss": 0.2743, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.0708263069139967, |
| "grad_norm": 0.2401183451917892, |
| "learning_rate": 1.720799500312305e-05, |
| "loss": 0.2742, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.072512647554806, |
| "grad_norm": 0.22886720556268084, |
| "learning_rate": 1.717676452217364e-05, |
| "loss": 0.2714, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.0741989881956155, |
| "grad_norm": 0.22005987910735308, |
| "learning_rate": 1.7145534041224233e-05, |
| "loss": 0.2799, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.075885328836425, |
| "grad_norm": 0.23405270800317268, |
| "learning_rate": 1.711430356027483e-05, |
| "loss": 0.2763, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.0775716694772344, |
| "grad_norm": 0.22390802443877075, |
| "learning_rate": 1.7083073079325424e-05, |
| "loss": 0.2782, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.079258010118044, |
| "grad_norm": 0.2233723580929941, |
| "learning_rate": 1.7051842598376017e-05, |
| "loss": 0.294, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.080944350758853, |
| "grad_norm": 0.24116001791573663, |
| "learning_rate": 1.702061211742661e-05, |
| "loss": 0.277, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.082630691399663, |
| "grad_norm": 0.23973427379224568, |
| "learning_rate": 1.6989381636477204e-05, |
| "loss": 0.2799, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.084317032040472, |
| "grad_norm": 0.25867200752208974, |
| "learning_rate": 1.6958151155527798e-05, |
| "loss": 0.2887, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.0860033726812817, |
| "grad_norm": 0.21859934229938208, |
| "learning_rate": 1.6926920674578388e-05, |
| "loss": 0.2839, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.087689713322091, |
| "grad_norm": 0.23071892456929038, |
| "learning_rate": 1.689569019362898e-05, |
| "loss": 0.281, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.0893760539629005, |
| "grad_norm": 0.24681673388720834, |
| "learning_rate": 1.6864459712679575e-05, |
| "loss": 0.2891, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.09106239460371, |
| "grad_norm": 0.22203763739533036, |
| "learning_rate": 1.683322923173017e-05, |
| "loss": 0.2835, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.0927487352445193, |
| "grad_norm": 0.23625461690323366, |
| "learning_rate": 1.6801998750780762e-05, |
| "loss": 0.2882, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.094435075885329, |
| "grad_norm": 0.2495043900664838, |
| "learning_rate": 1.6770768269831356e-05, |
| "loss": 0.2866, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.096121416526138, |
| "grad_norm": 0.24755254920805522, |
| "learning_rate": 1.673953778888195e-05, |
| "loss": 0.3068, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.097807757166948, |
| "grad_norm": 0.2321369024402958, |
| "learning_rate": 1.6708307307932543e-05, |
| "loss": 0.2851, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.099494097807757, |
| "grad_norm": 0.23341761835203848, |
| "learning_rate": 1.6677076826983136e-05, |
| "loss": 0.2774, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.1011804384485666, |
| "grad_norm": 0.228643496609932, |
| "learning_rate": 1.664584634603373e-05, |
| "loss": 0.2871, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.1028667790893762, |
| "grad_norm": 0.24199355513800766, |
| "learning_rate": 1.6614615865084323e-05, |
| "loss": 0.2923, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.1045531197301854, |
| "grad_norm": 0.23947463740759362, |
| "learning_rate": 1.6583385384134917e-05, |
| "loss": 0.2841, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.106239460370995, |
| "grad_norm": 0.2443776609037523, |
| "learning_rate": 1.655215490318551e-05, |
| "loss": 0.2863, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.1079258010118043, |
| "grad_norm": 0.23349457936994056, |
| "learning_rate": 1.6520924422236104e-05, |
| "loss": 0.2767, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.109612141652614, |
| "grad_norm": 0.2204903592956088, |
| "learning_rate": 1.6489693941286698e-05, |
| "loss": 0.2741, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.111298482293423, |
| "grad_norm": 0.24727353017816828, |
| "learning_rate": 1.645846346033729e-05, |
| "loss": 0.2812, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.1129848229342327, |
| "grad_norm": 0.2311319506145901, |
| "learning_rate": 1.642723297938788e-05, |
| "loss": 0.2799, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.1146711635750424, |
| "grad_norm": 0.2368796716322885, |
| "learning_rate": 1.6396002498438475e-05, |
| "loss": 0.2872, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.1163575042158516, |
| "grad_norm": 0.22458704529716217, |
| "learning_rate": 1.636477201748907e-05, |
| "loss": 0.2833, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.118043844856661, |
| "grad_norm": 0.2257545808978752, |
| "learning_rate": 1.6333541536539662e-05, |
| "loss": 0.2781, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.1197301854974704, |
| "grad_norm": 0.217874801326409, |
| "learning_rate": 1.630231105559026e-05, |
| "loss": 0.2805, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.12141652613828, |
| "grad_norm": 0.23071381152166529, |
| "learning_rate": 1.6271080574640852e-05, |
| "loss": 0.2798, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.123102866779089, |
| "grad_norm": 0.21589917060864472, |
| "learning_rate": 1.6239850093691446e-05, |
| "loss": 0.287, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.124789207419899, |
| "grad_norm": 0.22233422998925415, |
| "learning_rate": 1.6208619612742036e-05, |
| "loss": 0.2918, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.126475548060708, |
| "grad_norm": 0.2253005585037101, |
| "learning_rate": 1.617738913179263e-05, |
| "loss": 0.2855, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.1281618887015177, |
| "grad_norm": 0.24525887817635011, |
| "learning_rate": 1.6146158650843223e-05, |
| "loss": 0.3026, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.1298482293423273, |
| "grad_norm": 0.2311288754571125, |
| "learning_rate": 1.6114928169893817e-05, |
| "loss": 0.2919, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.1315345699831365, |
| "grad_norm": 0.25357016877593264, |
| "learning_rate": 1.608369768894441e-05, |
| "loss": 0.2793, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.133220910623946, |
| "grad_norm": 0.24402417920686248, |
| "learning_rate": 1.6052467207995004e-05, |
| "loss": 0.2729, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.1349072512647553, |
| "grad_norm": 0.24043061619632167, |
| "learning_rate": 1.6021236727045597e-05, |
| "loss": 0.2812, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.136593591905565, |
| "grad_norm": 0.2256502846211635, |
| "learning_rate": 1.599000624609619e-05, |
| "loss": 0.2785, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.138279932546374, |
| "grad_norm": 0.20616813865116917, |
| "learning_rate": 1.5958775765146785e-05, |
| "loss": 0.27, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.139966273187184, |
| "grad_norm": 0.2578343792815402, |
| "learning_rate": 1.5927545284197375e-05, |
| "loss": 0.2919, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.1416526138279934, |
| "grad_norm": 0.24824972542043958, |
| "learning_rate": 1.589631480324797e-05, |
| "loss": 0.2861, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.1433389544688026, |
| "grad_norm": 0.24683021472584324, |
| "learning_rate": 1.5865084322298565e-05, |
| "loss": 0.2855, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.1450252951096123, |
| "grad_norm": 0.23061775902190973, |
| "learning_rate": 1.583385384134916e-05, |
| "loss": 0.2821, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.1467116357504215, |
| "grad_norm": 0.25513089040919573, |
| "learning_rate": 1.5802623360399752e-05, |
| "loss": 0.277, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.148397976391231, |
| "grad_norm": 0.24772256510306015, |
| "learning_rate": 1.5771392879450346e-05, |
| "loss": 0.2743, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.1500843170320403, |
| "grad_norm": 0.2377207233659539, |
| "learning_rate": 1.574016239850094e-05, |
| "loss": 0.2768, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.15177065767285, |
| "grad_norm": 0.2130979567502923, |
| "learning_rate": 1.570893191755153e-05, |
| "loss": 0.2754, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.1534569983136596, |
| "grad_norm": 0.262105795530253, |
| "learning_rate": 1.5677701436602123e-05, |
| "loss": 0.2814, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.1551433389544687, |
| "grad_norm": 0.25109311323666383, |
| "learning_rate": 1.5646470955652717e-05, |
| "loss": 0.2765, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.1568296795952784, |
| "grad_norm": 0.22712313240141768, |
| "learning_rate": 1.561524047470331e-05, |
| "loss": 0.2848, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.1585160202360876, |
| "grad_norm": 0.28309648201217064, |
| "learning_rate": 1.5584009993753904e-05, |
| "loss": 0.296, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.160202360876897, |
| "grad_norm": 0.2259925274682846, |
| "learning_rate": 1.5552779512804497e-05, |
| "loss": 0.274, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.1618887015177064, |
| "grad_norm": 0.2576654720712196, |
| "learning_rate": 1.5521549031855094e-05, |
| "loss": 0.2802, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.163575042158516, |
| "grad_norm": 0.24876972465327207, |
| "learning_rate": 1.5490318550905684e-05, |
| "loss": 0.2752, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.1652613827993257, |
| "grad_norm": 0.27479181196325325, |
| "learning_rate": 1.5459088069956278e-05, |
| "loss": 0.2995, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.166947723440135, |
| "grad_norm": 0.2777973590363955, |
| "learning_rate": 1.542785758900687e-05, |
| "loss": 0.2905, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.1686340640809445, |
| "grad_norm": 0.21470741412226269, |
| "learning_rate": 1.5396627108057465e-05, |
| "loss": 0.2806, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.1703204047217537, |
| "grad_norm": 0.25995545738255027, |
| "learning_rate": 1.536539662710806e-05, |
| "loss": 0.2783, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.1720067453625633, |
| "grad_norm": 0.26866950347821744, |
| "learning_rate": 1.5334166146158652e-05, |
| "loss": 0.2958, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.1736930860033725, |
| "grad_norm": 0.22667179523843922, |
| "learning_rate": 1.5302935665209246e-05, |
| "loss": 0.2925, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.175379426644182, |
| "grad_norm": 0.2524094258477164, |
| "learning_rate": 1.527170518425984e-05, |
| "loss": 0.2899, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.177065767284992, |
| "grad_norm": 0.25430523574145514, |
| "learning_rate": 1.5240474703310431e-05, |
| "loss": 0.2978, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.178752107925801, |
| "grad_norm": 0.23658111810584043, |
| "learning_rate": 1.5209244222361025e-05, |
| "loss": 0.2861, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.1804384485666106, |
| "grad_norm": 0.23263458485765506, |
| "learning_rate": 1.5178013741411618e-05, |
| "loss": 0.2943, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.18212478920742, |
| "grad_norm": 0.23038244450450082, |
| "learning_rate": 1.514678326046221e-05, |
| "loss": 0.2753, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.1838111298482294, |
| "grad_norm": 0.26875956119847794, |
| "learning_rate": 1.5115552779512807e-05, |
| "loss": 0.2835, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.1854974704890386, |
| "grad_norm": 0.23121532536402334, |
| "learning_rate": 1.50843222985634e-05, |
| "loss": 0.2736, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.1871838111298483, |
| "grad_norm": 0.21410903606747092, |
| "learning_rate": 1.5053091817613992e-05, |
| "loss": 0.2865, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.1888701517706575, |
| "grad_norm": 0.2332520113659498, |
| "learning_rate": 1.5021861336664586e-05, |
| "loss": 0.2737, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.190556492411467, |
| "grad_norm": 0.24890045008623024, |
| "learning_rate": 1.499063085571518e-05, |
| "loss": 0.2747, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.1922428330522767, |
| "grad_norm": 0.24074106694356046, |
| "learning_rate": 1.4959400374765773e-05, |
| "loss": 0.3033, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.193929173693086, |
| "grad_norm": 0.22496522452819792, |
| "learning_rate": 1.4928169893816365e-05, |
| "loss": 0.2698, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.1956155143338956, |
| "grad_norm": 0.24173115918945443, |
| "learning_rate": 1.4896939412866958e-05, |
| "loss": 0.2647, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.1973018549747048, |
| "grad_norm": 0.24597057276067655, |
| "learning_rate": 1.4865708931917552e-05, |
| "loss": 0.2863, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.1989881956155144, |
| "grad_norm": 0.2170673774515546, |
| "learning_rate": 1.4834478450968146e-05, |
| "loss": 0.2671, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.2006745362563236, |
| "grad_norm": 0.22410705790302243, |
| "learning_rate": 1.4803247970018737e-05, |
| "loss": 0.2897, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.2023608768971332, |
| "grad_norm": 0.2723128596611122, |
| "learning_rate": 1.4772017489069331e-05, |
| "loss": 0.2856, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.204047217537943, |
| "grad_norm": 0.23229362442508056, |
| "learning_rate": 1.4740787008119924e-05, |
| "loss": 0.2793, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.205733558178752, |
| "grad_norm": 0.21690579052268813, |
| "learning_rate": 1.470955652717052e-05, |
| "loss": 0.2755, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.2074198988195617, |
| "grad_norm": 0.24232075794132246, |
| "learning_rate": 1.4678326046221113e-05, |
| "loss": 0.2866, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.209106239460371, |
| "grad_norm": 0.24128736334554524, |
| "learning_rate": 1.4647095565271707e-05, |
| "loss": 0.2909, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.2107925801011805, |
| "grad_norm": 0.22279945820892214, |
| "learning_rate": 1.46158650843223e-05, |
| "loss": 0.274, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.2124789207419897, |
| "grad_norm": 0.22506715307821687, |
| "learning_rate": 1.4584634603372894e-05, |
| "loss": 0.2733, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.2141652613827993, |
| "grad_norm": 0.2671414216519164, |
| "learning_rate": 1.4553404122423486e-05, |
| "loss": 0.2773, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.2158516020236085, |
| "grad_norm": 0.24159986202299671, |
| "learning_rate": 1.452217364147408e-05, |
| "loss": 0.2854, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.217537942664418, |
| "grad_norm": 0.21206993766665885, |
| "learning_rate": 1.4490943160524673e-05, |
| "loss": 0.2802, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.219224283305228, |
| "grad_norm": 0.2609251300600537, |
| "learning_rate": 1.4459712679575266e-05, |
| "loss": 0.302, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.220910623946037, |
| "grad_norm": 0.2613884146443943, |
| "learning_rate": 1.4428482198625858e-05, |
| "loss": 0.2858, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.2225969645868466, |
| "grad_norm": 0.23026770972951804, |
| "learning_rate": 1.4397251717676452e-05, |
| "loss": 0.2901, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.224283305227656, |
| "grad_norm": 0.2644082966922968, |
| "learning_rate": 1.4366021236727045e-05, |
| "loss": 0.2912, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.2259696458684655, |
| "grad_norm": 0.28137455460025673, |
| "learning_rate": 1.4334790755777639e-05, |
| "loss": 0.2789, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.2276559865092747, |
| "grad_norm": 0.2265306145043108, |
| "learning_rate": 1.4303560274828234e-05, |
| "loss": 0.29, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.2293423271500843, |
| "grad_norm": 0.22638647931653802, |
| "learning_rate": 1.4272329793878828e-05, |
| "loss": 0.2836, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.231028667790894, |
| "grad_norm": 0.2584632170534727, |
| "learning_rate": 1.4241099312929421e-05, |
| "loss": 0.2882, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.232715008431703, |
| "grad_norm": 0.24957538711555913, |
| "learning_rate": 1.4209868831980013e-05, |
| "loss": 0.2726, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.2344013490725128, |
| "grad_norm": 0.23246788729694776, |
| "learning_rate": 1.4178638351030607e-05, |
| "loss": 0.292, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.236087689713322, |
| "grad_norm": 0.23914882917315505, |
| "learning_rate": 1.41474078700812e-05, |
| "loss": 0.2918, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.2377740303541316, |
| "grad_norm": 0.2314608760403086, |
| "learning_rate": 1.4116177389131794e-05, |
| "loss": 0.2774, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.2394603709949408, |
| "grad_norm": 0.2329062705306257, |
| "learning_rate": 1.4084946908182387e-05, |
| "loss": 0.2709, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.2411467116357504, |
| "grad_norm": 0.22203184430747988, |
| "learning_rate": 1.405371642723298e-05, |
| "loss": 0.278, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.24283305227656, |
| "grad_norm": 0.2447086816439649, |
| "learning_rate": 1.4022485946283573e-05, |
| "loss": 0.2764, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.2445193929173692, |
| "grad_norm": 0.2591556094088965, |
| "learning_rate": 1.3991255465334166e-05, |
| "loss": 0.2801, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.246205733558179, |
| "grad_norm": 0.23957435468644062, |
| "learning_rate": 1.396002498438476e-05, |
| "loss": 0.2714, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.247892074198988, |
| "grad_norm": 0.2606681194052524, |
| "learning_rate": 1.3928794503435352e-05, |
| "loss": 0.2915, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.2495784148397977, |
| "grad_norm": 0.23947024763981367, |
| "learning_rate": 1.3897564022485949e-05, |
| "loss": 0.278, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.251264755480607, |
| "grad_norm": 0.25887410797765165, |
| "learning_rate": 1.3866333541536542e-05, |
| "loss": 0.2883, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.2529510961214165, |
| "grad_norm": 0.24241816096972651, |
| "learning_rate": 1.3835103060587134e-05, |
| "loss": 0.2702, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.254637436762226, |
| "grad_norm": 0.23752929575738543, |
| "learning_rate": 1.3803872579637728e-05, |
| "loss": 0.3026, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.2563237774030354, |
| "grad_norm": 0.22243933683755185, |
| "learning_rate": 1.3772642098688321e-05, |
| "loss": 0.287, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.258010118043845, |
| "grad_norm": 0.22715493640118753, |
| "learning_rate": 1.3741411617738915e-05, |
| "loss": 0.2833, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.259696458684654, |
| "grad_norm": 0.2301390684777108, |
| "learning_rate": 1.3710181136789507e-05, |
| "loss": 0.2735, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.261382799325464, |
| "grad_norm": 0.20966361714580345, |
| "learning_rate": 1.36789506558401e-05, |
| "loss": 0.2751, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.263069139966273, |
| "grad_norm": 0.24196298157506335, |
| "learning_rate": 1.3647720174890694e-05, |
| "loss": 0.2802, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.2647554806070826, |
| "grad_norm": 0.22273345922078996, |
| "learning_rate": 1.3616489693941287e-05, |
| "loss": 0.2718, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.2664418212478923, |
| "grad_norm": 0.23365684879187829, |
| "learning_rate": 1.358525921299188e-05, |
| "loss": 0.2858, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.2681281618887015, |
| "grad_norm": 0.23424780996142444, |
| "learning_rate": 1.3554028732042473e-05, |
| "loss": 0.2658, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.269814502529511, |
| "grad_norm": 0.25740201871247437, |
| "learning_rate": 1.3522798251093066e-05, |
| "loss": 0.2872, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.2715008431703203, |
| "grad_norm": 0.2397190411229239, |
| "learning_rate": 1.3491567770143661e-05, |
| "loss": 0.2882, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.27318718381113, |
| "grad_norm": 0.2683555480791064, |
| "learning_rate": 1.3460337289194255e-05, |
| "loss": 0.2776, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.274873524451939, |
| "grad_norm": 0.2860216494530701, |
| "learning_rate": 1.3429106808244848e-05, |
| "loss": 0.2994, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.2765598650927488, |
| "grad_norm": 0.23203762775591186, |
| "learning_rate": 1.3397876327295442e-05, |
| "loss": 0.2922, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.2782462057335584, |
| "grad_norm": 0.25415952020742033, |
| "learning_rate": 1.3366645846346036e-05, |
| "loss": 0.2916, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.2799325463743676, |
| "grad_norm": 0.2155270459840728, |
| "learning_rate": 1.3335415365396627e-05, |
| "loss": 0.2738, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.2816188870151772, |
| "grad_norm": 0.21663505940255026, |
| "learning_rate": 1.3304184884447221e-05, |
| "loss": 0.2794, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.2833052276559864, |
| "grad_norm": 0.23570468928328556, |
| "learning_rate": 1.3272954403497814e-05, |
| "loss": 0.3047, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.284991568296796, |
| "grad_norm": 0.22355120229741432, |
| "learning_rate": 1.3241723922548408e-05, |
| "loss": 0.2942, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.2866779089376053, |
| "grad_norm": 0.22909057483377146, |
| "learning_rate": 1.3210493441599e-05, |
| "loss": 0.291, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.288364249578415, |
| "grad_norm": 0.2397792113373173, |
| "learning_rate": 1.3179262960649593e-05, |
| "loss": 0.2953, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.2900505902192245, |
| "grad_norm": 0.22734437317882458, |
| "learning_rate": 1.3148032479700187e-05, |
| "loss": 0.2887, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.2917369308600337, |
| "grad_norm": 0.22738598098791915, |
| "learning_rate": 1.311680199875078e-05, |
| "loss": 0.296, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.2934232715008434, |
| "grad_norm": 0.21592690968927647, |
| "learning_rate": 1.3085571517801376e-05, |
| "loss": 0.2842, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.2951096121416525, |
| "grad_norm": 0.23330931583642653, |
| "learning_rate": 1.305434103685197e-05, |
| "loss": 0.2861, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.296795952782462, |
| "grad_norm": 0.24244296287404996, |
| "learning_rate": 1.3023110555902563e-05, |
| "loss": 0.2807, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.2984822934232714, |
| "grad_norm": 0.2241173805399546, |
| "learning_rate": 1.2991880074953155e-05, |
| "loss": 0.2833, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.300168634064081, |
| "grad_norm": 0.23286405254784961, |
| "learning_rate": 1.2960649594003748e-05, |
| "loss": 0.2693, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.30185497470489, |
| "grad_norm": 0.24770154492130161, |
| "learning_rate": 1.2929419113054342e-05, |
| "loss": 0.2718, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.3035413153457, |
| "grad_norm": 0.2086407055272546, |
| "learning_rate": 1.2898188632104935e-05, |
| "loss": 0.2707, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.305227655986509, |
| "grad_norm": 0.24433979227348487, |
| "learning_rate": 1.2866958151155529e-05, |
| "loss": 0.2855, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.3069139966273187, |
| "grad_norm": 0.25872917443082816, |
| "learning_rate": 1.283572767020612e-05, |
| "loss": 0.2847, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.3086003372681283, |
| "grad_norm": 0.22534225398839433, |
| "learning_rate": 1.2804497189256714e-05, |
| "loss": 0.28, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.3102866779089375, |
| "grad_norm": 0.24181635268820018, |
| "learning_rate": 1.2773266708307308e-05, |
| "loss": 0.3136, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.311973018549747, |
| "grad_norm": 0.2349690814589048, |
| "learning_rate": 1.2742036227357901e-05, |
| "loss": 0.2788, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.3136593591905563, |
| "grad_norm": 0.23437618515026656, |
| "learning_rate": 1.2710805746408493e-05, |
| "loss": 0.289, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.315345699831366, |
| "grad_norm": 0.22857198837900544, |
| "learning_rate": 1.267957526545909e-05, |
| "loss": 0.2933, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.317032040472175, |
| "grad_norm": 0.23374151297523482, |
| "learning_rate": 1.2648344784509684e-05, |
| "loss": 0.2921, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.318718381112985, |
| "grad_norm": 0.2272494901135597, |
| "learning_rate": 1.2617114303560276e-05, |
| "loss": 0.2843, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.3204047217537944, |
| "grad_norm": 0.22957991022868246, |
| "learning_rate": 1.258588382261087e-05, |
| "loss": 0.2816, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.3220910623946036, |
| "grad_norm": 0.22712652918413673, |
| "learning_rate": 1.2554653341661463e-05, |
| "loss": 0.2946, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.3237774030354132, |
| "grad_norm": 0.20391612529685307, |
| "learning_rate": 1.2523422860712056e-05, |
| "loss": 0.277, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.3254637436762224, |
| "grad_norm": 0.2507935893570099, |
| "learning_rate": 1.2492192379762648e-05, |
| "loss": 0.2916, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.327150084317032, |
| "grad_norm": 0.2170341726977326, |
| "learning_rate": 1.2460961898813242e-05, |
| "loss": 0.2791, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.3288364249578413, |
| "grad_norm": 0.21290499300263288, |
| "learning_rate": 1.2429731417863835e-05, |
| "loss": 0.3004, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.330522765598651, |
| "grad_norm": 0.2386289179226274, |
| "learning_rate": 1.239850093691443e-05, |
| "loss": 0.2883, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.3322091062394605, |
| "grad_norm": 0.24106374267540046, |
| "learning_rate": 1.2367270455965022e-05, |
| "loss": 0.2976, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.3338954468802697, |
| "grad_norm": 0.24279979431421234, |
| "learning_rate": 1.2336039975015616e-05, |
| "loss": 0.2956, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.3355817875210794, |
| "grad_norm": 0.2415298045824607, |
| "learning_rate": 1.230480949406621e-05, |
| "loss": 0.2971, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.3372681281618886, |
| "grad_norm": 0.22791130385805178, |
| "learning_rate": 1.2273579013116803e-05, |
| "loss": 0.283, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.338954468802698, |
| "grad_norm": 0.22723222459345518, |
| "learning_rate": 1.2242348532167395e-05, |
| "loss": 0.2795, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.3406408094435074, |
| "grad_norm": 0.24264089151335377, |
| "learning_rate": 1.2211118051217988e-05, |
| "loss": 0.2937, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.342327150084317, |
| "grad_norm": 0.24686234338788215, |
| "learning_rate": 1.2179887570268584e-05, |
| "loss": 0.289, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.3440134907251267, |
| "grad_norm": 0.21992049757462978, |
| "learning_rate": 1.2148657089319177e-05, |
| "loss": 0.2986, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.345699831365936, |
| "grad_norm": 0.19803695791646844, |
| "learning_rate": 1.2117426608369769e-05, |
| "loss": 0.2857, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.3473861720067455, |
| "grad_norm": 0.23900469154795304, |
| "learning_rate": 1.2086196127420363e-05, |
| "loss": 0.2819, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.3490725126475547, |
| "grad_norm": 0.21287079879552834, |
| "learning_rate": 1.2054965646470956e-05, |
| "loss": 0.2797, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.3507588532883643, |
| "grad_norm": 0.2210244546656982, |
| "learning_rate": 1.202373516552155e-05, |
| "loss": 0.2932, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.3524451939291735, |
| "grad_norm": 0.2048304713681441, |
| "learning_rate": 1.1992504684572143e-05, |
| "loss": 0.2724, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.354131534569983, |
| "grad_norm": 0.25839090758710703, |
| "learning_rate": 1.1961274203622737e-05, |
| "loss": 0.2905, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.3558178752107928, |
| "grad_norm": 0.2281410884233869, |
| "learning_rate": 1.193004372267333e-05, |
| "loss": 0.2838, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.357504215851602, |
| "grad_norm": 0.21845257383012132, |
| "learning_rate": 1.1898813241723924e-05, |
| "loss": 0.2792, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.3591905564924116, |
| "grad_norm": 0.2618765844226696, |
| "learning_rate": 1.1867582760774516e-05, |
| "loss": 0.2931, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.360876897133221, |
| "grad_norm": 0.275866954983335, |
| "learning_rate": 1.183635227982511e-05, |
| "loss": 0.299, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.3625632377740304, |
| "grad_norm": 0.2001624858205199, |
| "learning_rate": 1.1805121798875703e-05, |
| "loss": 0.2737, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.3642495784148396, |
| "grad_norm": 0.23084935393173628, |
| "learning_rate": 1.1773891317926296e-05, |
| "loss": 0.3037, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.3659359190556493, |
| "grad_norm": 0.2396914920117963, |
| "learning_rate": 1.174266083697689e-05, |
| "loss": 0.304, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.367622259696459, |
| "grad_norm": 0.22867487444328807, |
| "learning_rate": 1.1711430356027483e-05, |
| "loss": 0.2652, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.369308600337268, |
| "grad_norm": 0.2594652883181782, |
| "learning_rate": 1.1680199875078077e-05, |
| "loss": 0.2914, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.3709949409780777, |
| "grad_norm": 0.22604681948110886, |
| "learning_rate": 1.164896939412867e-05, |
| "loss": 0.2762, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.372681281618887, |
| "grad_norm": 0.24016465263641243, |
| "learning_rate": 1.1617738913179262e-05, |
| "loss": 0.2726, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.3743676222596966, |
| "grad_norm": 0.2815087199536877, |
| "learning_rate": 1.1586508432229858e-05, |
| "loss": 0.304, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.3760539629005057, |
| "grad_norm": 0.22537579065518193, |
| "learning_rate": 1.1555277951280451e-05, |
| "loss": 0.2986, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.3777403035413154, |
| "grad_norm": 0.22030692295872925, |
| "learning_rate": 1.1524047470331043e-05, |
| "loss": 0.2846, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.379426644182125, |
| "grad_norm": 0.2614924581245704, |
| "learning_rate": 1.1492816989381637e-05, |
| "loss": 0.2804, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.381112984822934, |
| "grad_norm": 0.2525035198240204, |
| "learning_rate": 1.146158650843223e-05, |
| "loss": 0.2931, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.382799325463744, |
| "grad_norm": 0.25978609967948285, |
| "learning_rate": 1.1430356027482824e-05, |
| "loss": 0.2794, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.384485666104553, |
| "grad_norm": 0.24317234044800626, |
| "learning_rate": 1.1399125546533417e-05, |
| "loss": 0.2933, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.3861720067453627, |
| "grad_norm": 0.2724691738795029, |
| "learning_rate": 1.136789506558401e-05, |
| "loss": 0.2927, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.387858347386172, |
| "grad_norm": 0.23730128931622077, |
| "learning_rate": 1.1336664584634604e-05, |
| "loss": 0.2829, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.3895446880269815, |
| "grad_norm": 0.25629553877972355, |
| "learning_rate": 1.1305434103685198e-05, |
| "loss": 0.3061, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.391231028667791, |
| "grad_norm": 0.22474868934736805, |
| "learning_rate": 1.127420362273579e-05, |
| "loss": 0.2797, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.3929173693086003, |
| "grad_norm": 0.23721846546754655, |
| "learning_rate": 1.1242973141786383e-05, |
| "loss": 0.2796, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.39460370994941, |
| "grad_norm": 0.2699939342316147, |
| "learning_rate": 1.1211742660836977e-05, |
| "loss": 0.2802, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.396290050590219, |
| "grad_norm": 0.2509867604822763, |
| "learning_rate": 1.1180512179887572e-05, |
| "loss": 0.2798, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.397976391231029, |
| "grad_norm": 0.22384337619313563, |
| "learning_rate": 1.1149281698938164e-05, |
| "loss": 0.2812, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.399662731871838, |
| "grad_norm": 0.25255278624393845, |
| "learning_rate": 1.1118051217988758e-05, |
| "loss": 0.289, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.4013490725126476, |
| "grad_norm": 0.23643509840246096, |
| "learning_rate": 1.1086820737039351e-05, |
| "loss": 0.2838, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.403035413153457, |
| "grad_norm": 0.23374305341737317, |
| "learning_rate": 1.1055590256089945e-05, |
| "loss": 0.2862, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.4047217537942664, |
| "grad_norm": 0.2319544136431847, |
| "learning_rate": 1.1024359775140536e-05, |
| "loss": 0.2896, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.4064080944350756, |
| "grad_norm": 0.24022760550379593, |
| "learning_rate": 1.099312929419113e-05, |
| "loss": 0.2902, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.4080944350758853, |
| "grad_norm": 0.23074596259534727, |
| "learning_rate": 1.0961898813241725e-05, |
| "loss": 0.2773, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.409780775716695, |
| "grad_norm": 0.2676398026066761, |
| "learning_rate": 1.0930668332292319e-05, |
| "loss": 0.2986, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.411467116357504, |
| "grad_norm": 0.2812523688455711, |
| "learning_rate": 1.089943785134291e-05, |
| "loss": 0.2879, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.4131534569983137, |
| "grad_norm": 0.23002037986702084, |
| "learning_rate": 1.0868207370393504e-05, |
| "loss": 0.2998, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.414839797639123, |
| "grad_norm": 0.21862223377159512, |
| "learning_rate": 1.0836976889444098e-05, |
| "loss": 0.2771, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.4165261382799326, |
| "grad_norm": 0.29976235610573854, |
| "learning_rate": 1.0805746408494691e-05, |
| "loss": 0.2829, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.4182124789207418, |
| "grad_norm": 0.21791369459789725, |
| "learning_rate": 1.0774515927545285e-05, |
| "loss": 0.2783, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.4198988195615514, |
| "grad_norm": 0.23823404787623872, |
| "learning_rate": 1.0743285446595878e-05, |
| "loss": 0.2882, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.421585160202361, |
| "grad_norm": 0.2217777512693731, |
| "learning_rate": 1.0712054965646472e-05, |
| "loss": 0.2745, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.4232715008431702, |
| "grad_norm": 0.23152128478395884, |
| "learning_rate": 1.0680824484697066e-05, |
| "loss": 0.2792, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.42495784148398, |
| "grad_norm": 0.2551072037505355, |
| "learning_rate": 1.0649594003747657e-05, |
| "loss": 0.2795, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.426644182124789, |
| "grad_norm": 0.25243262292499147, |
| "learning_rate": 1.0618363522798251e-05, |
| "loss": 0.2864, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.4283305227655987, |
| "grad_norm": 0.2401318261662841, |
| "learning_rate": 1.0587133041848844e-05, |
| "loss": 0.2749, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.430016863406408, |
| "grad_norm": 0.2482249657282476, |
| "learning_rate": 1.0555902560899438e-05, |
| "loss": 0.2844, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.4317032040472175, |
| "grad_norm": 0.23941825406366568, |
| "learning_rate": 1.0524672079950032e-05, |
| "loss": 0.2783, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.433389544688027, |
| "grad_norm": 0.24014673070086756, |
| "learning_rate": 1.0493441599000625e-05, |
| "loss": 0.2876, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.4350758853288363, |
| "grad_norm": 0.21924519731054826, |
| "learning_rate": 1.0462211118051219e-05, |
| "loss": 0.2821, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.436762225969646, |
| "grad_norm": 0.27891282450208976, |
| "learning_rate": 1.0430980637101812e-05, |
| "loss": 0.2887, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.438448566610455, |
| "grad_norm": 0.2344618379707951, |
| "learning_rate": 1.0399750156152404e-05, |
| "loss": 0.2841, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.440134907251265, |
| "grad_norm": 0.21563837277257256, |
| "learning_rate": 1.0368519675203e-05, |
| "loss": 0.2766, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.441821247892074, |
| "grad_norm": 0.22943894425867237, |
| "learning_rate": 1.0337289194253593e-05, |
| "loss": 0.2845, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.4435075885328836, |
| "grad_norm": 0.20797197636256856, |
| "learning_rate": 1.0306058713304185e-05, |
| "loss": 0.2725, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.4451939291736933, |
| "grad_norm": 0.24047436221214155, |
| "learning_rate": 1.0274828232354778e-05, |
| "loss": 0.2794, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.4468802698145025, |
| "grad_norm": 0.23419204597117435, |
| "learning_rate": 1.0243597751405372e-05, |
| "loss": 0.2906, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.448566610455312, |
| "grad_norm": 0.23568491888175902, |
| "learning_rate": 1.0212367270455965e-05, |
| "loss": 0.2642, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.4502529510961213, |
| "grad_norm": 0.220781160618401, |
| "learning_rate": 1.0181136789506559e-05, |
| "loss": 0.2955, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.451939291736931, |
| "grad_norm": 0.23979617613977305, |
| "learning_rate": 1.0149906308557152e-05, |
| "loss": 0.2782, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.45362563237774, |
| "grad_norm": 0.23607145873020913, |
| "learning_rate": 1.0118675827607746e-05, |
| "loss": 0.2664, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.4553119730185498, |
| "grad_norm": 0.22391814814744884, |
| "learning_rate": 1.008744534665834e-05, |
| "loss": 0.2691, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.4569983136593594, |
| "grad_norm": 0.25227619396729256, |
| "learning_rate": 1.0056214865708931e-05, |
| "loss": 0.283, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.4586846543001686, |
| "grad_norm": 0.23572480074334606, |
| "learning_rate": 1.0024984384759525e-05, |
| "loss": 0.2836, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.460370994940978, |
| "grad_norm": 0.2338220636711118, |
| "learning_rate": 9.993753903810119e-06, |
| "loss": 0.2899, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.4620573355817874, |
| "grad_norm": 0.23295208691141953, |
| "learning_rate": 9.962523422860714e-06, |
| "loss": 0.2945, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.463743676222597, |
| "grad_norm": 0.21976085069131715, |
| "learning_rate": 9.931292941911306e-06, |
| "loss": 0.2848, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.4654300168634062, |
| "grad_norm": 0.2222298647571462, |
| "learning_rate": 9.9000624609619e-06, |
| "loss": 0.2769, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.467116357504216, |
| "grad_norm": 0.2559508939465625, |
| "learning_rate": 9.868831980012493e-06, |
| "loss": 0.2937, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.4688026981450255, |
| "grad_norm": 0.24584859890488658, |
| "learning_rate": 9.837601499063086e-06, |
| "loss": 0.279, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.4704890387858347, |
| "grad_norm": 0.21859759501061693, |
| "learning_rate": 9.806371018113678e-06, |
| "loss": 0.2747, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.4721753794266443, |
| "grad_norm": 0.26054047978030376, |
| "learning_rate": 9.775140537164273e-06, |
| "loss": 0.2696, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.4738617200674535, |
| "grad_norm": 0.22573529466661918, |
| "learning_rate": 9.743910056214867e-06, |
| "loss": 0.2815, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.475548060708263, |
| "grad_norm": 0.22375838955077348, |
| "learning_rate": 9.71267957526546e-06, |
| "loss": 0.2776, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.4772344013490724, |
| "grad_norm": 0.22425707199526068, |
| "learning_rate": 9.681449094316052e-06, |
| "loss": 0.3007, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.478920741989882, |
| "grad_norm": 0.2372375809699635, |
| "learning_rate": 9.650218613366646e-06, |
| "loss": 0.2896, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.4806070826306916, |
| "grad_norm": 0.2209793719324253, |
| "learning_rate": 9.61898813241724e-06, |
| "loss": 0.271, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.482293423271501, |
| "grad_norm": 0.272178984484663, |
| "learning_rate": 9.587757651467833e-06, |
| "loss": 0.3081, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.4839797639123105, |
| "grad_norm": 0.21530565491503215, |
| "learning_rate": 9.556527170518427e-06, |
| "loss": 0.2682, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.4856661045531196, |
| "grad_norm": 0.24950371032482432, |
| "learning_rate": 9.52529668956902e-06, |
| "loss": 0.2631, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.4873524451939293, |
| "grad_norm": 0.2379116998099416, |
| "learning_rate": 9.494066208619614e-06, |
| "loss": 0.2853, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.4890387858347385, |
| "grad_norm": 0.21081103689169245, |
| "learning_rate": 9.462835727670207e-06, |
| "loss": 0.2698, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.490725126475548, |
| "grad_norm": 0.22323696045894834, |
| "learning_rate": 9.431605246720799e-06, |
| "loss": 0.2726, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.4924114671163577, |
| "grad_norm": 0.21955495173287082, |
| "learning_rate": 9.400374765771393e-06, |
| "loss": 0.2889, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.494097807757167, |
| "grad_norm": 0.23023530655803884, |
| "learning_rate": 9.369144284821988e-06, |
| "loss": 0.2911, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.4957841483979766, |
| "grad_norm": 0.25141930925593015, |
| "learning_rate": 9.337913803872581e-06, |
| "loss": 0.2859, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.4974704890387858, |
| "grad_norm": 0.20504591024273977, |
| "learning_rate": 9.306683322923173e-06, |
| "loss": 0.2837, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.4991568296795954, |
| "grad_norm": 0.21175138808057636, |
| "learning_rate": 9.275452841973767e-06, |
| "loss": 0.2842, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.5008431703204046, |
| "grad_norm": 0.25946223776656135, |
| "learning_rate": 9.24422236102436e-06, |
| "loss": 0.2818, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.5025295109612142, |
| "grad_norm": 0.24555783079918103, |
| "learning_rate": 9.212991880074954e-06, |
| "loss": 0.2775, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.504215851602024, |
| "grad_norm": 0.2509789172967366, |
| "learning_rate": 9.181761399125546e-06, |
| "loss": 0.2847, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.505902192242833, |
| "grad_norm": 0.2219211199136465, |
| "learning_rate": 9.150530918176141e-06, |
| "loss": 0.275, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.5075885328836423, |
| "grad_norm": 0.2363255237521708, |
| "learning_rate": 9.119300437226734e-06, |
| "loss": 0.2836, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.509274873524452, |
| "grad_norm": 0.22035522503306615, |
| "learning_rate": 9.088069956277328e-06, |
| "loss": 0.2711, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.5109612141652615, |
| "grad_norm": 0.2363162187055854, |
| "learning_rate": 9.05683947532792e-06, |
| "loss": 0.2883, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.5126475548060707, |
| "grad_norm": 0.22924926722663613, |
| "learning_rate": 9.025608994378513e-06, |
| "loss": 0.2746, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.5143338954468804, |
| "grad_norm": 0.21468666781090634, |
| "learning_rate": 8.994378513429107e-06, |
| "loss": 0.2732, |
| "step": 1491 |
| }, |
| { |
| "epoch": 2.51602023608769, |
| "grad_norm": 0.22910744845776243, |
| "learning_rate": 8.9631480324797e-06, |
| "loss": 0.2888, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.517706576728499, |
| "grad_norm": 0.22362122867425577, |
| "learning_rate": 8.931917551530294e-06, |
| "loss": 0.2822, |
| "step": 1493 |
| }, |
| { |
| "epoch": 2.5193929173693084, |
| "grad_norm": 0.21554412005349557, |
| "learning_rate": 8.900687070580888e-06, |
| "loss": 0.2767, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.521079258010118, |
| "grad_norm": 0.2393134291816071, |
| "learning_rate": 8.869456589631481e-06, |
| "loss": 0.2859, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.5227655986509276, |
| "grad_norm": 0.2089083058882561, |
| "learning_rate": 8.838226108682075e-06, |
| "loss": 0.2737, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.524451939291737, |
| "grad_norm": 0.2241838198671693, |
| "learning_rate": 8.806995627732667e-06, |
| "loss": 0.2812, |
| "step": 1497 |
| }, |
| { |
| "epoch": 2.5261382799325465, |
| "grad_norm": 0.2161714545573455, |
| "learning_rate": 8.77576514678326e-06, |
| "loss": 0.299, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.5278246205733557, |
| "grad_norm": 0.22817866005045112, |
| "learning_rate": 8.744534665833855e-06, |
| "loss": 0.2816, |
| "step": 1499 |
| }, |
| { |
| "epoch": 2.5295109612141653, |
| "grad_norm": 0.2286712930717227, |
| "learning_rate": 8.713304184884447e-06, |
| "loss": 0.2929, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.5311973018549745, |
| "grad_norm": 0.19790624556774256, |
| "learning_rate": 8.68207370393504e-06, |
| "loss": 0.2768, |
| "step": 1501 |
| }, |
| { |
| "epoch": 2.532883642495784, |
| "grad_norm": 0.20099031176210763, |
| "learning_rate": 8.650843222985634e-06, |
| "loss": 0.2865, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.5345699831365938, |
| "grad_norm": 0.2283654562744821, |
| "learning_rate": 8.619612742036228e-06, |
| "loss": 0.2828, |
| "step": 1503 |
| }, |
| { |
| "epoch": 2.536256323777403, |
| "grad_norm": 0.20017848416510967, |
| "learning_rate": 8.58838226108682e-06, |
| "loss": 0.2858, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.5379426644182126, |
| "grad_norm": 0.20782530100569868, |
| "learning_rate": 8.557151780137415e-06, |
| "loss": 0.2762, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.539629005059022, |
| "grad_norm": 0.21768734755599212, |
| "learning_rate": 8.525921299188009e-06, |
| "loss": 0.2769, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.5413153456998314, |
| "grad_norm": 0.20221135790652545, |
| "learning_rate": 8.494690818238602e-06, |
| "loss": 0.2764, |
| "step": 1507 |
| }, |
| { |
| "epoch": 2.5430016863406406, |
| "grad_norm": 0.223661558640449, |
| "learning_rate": 8.463460337289194e-06, |
| "loss": 0.292, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.5446880269814502, |
| "grad_norm": 0.21107262757905626, |
| "learning_rate": 8.432229856339787e-06, |
| "loss": 0.2904, |
| "step": 1509 |
| }, |
| { |
| "epoch": 2.54637436762226, |
| "grad_norm": 0.21086031494965404, |
| "learning_rate": 8.400999375390381e-06, |
| "loss": 0.2843, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.548060708263069, |
| "grad_norm": 0.20215401518486265, |
| "learning_rate": 8.369768894440975e-06, |
| "loss": 0.2814, |
| "step": 1511 |
| }, |
| { |
| "epoch": 2.5497470489038787, |
| "grad_norm": 0.2322780280605853, |
| "learning_rate": 8.338538413491568e-06, |
| "loss": 0.3128, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.551433389544688, |
| "grad_norm": 0.21501337510584598, |
| "learning_rate": 8.307307932542162e-06, |
| "loss": 0.2956, |
| "step": 1513 |
| }, |
| { |
| "epoch": 2.5531197301854975, |
| "grad_norm": 0.23922833409027972, |
| "learning_rate": 8.276077451592755e-06, |
| "loss": 0.299, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.5548060708263067, |
| "grad_norm": 0.21327700733730867, |
| "learning_rate": 8.244846970643349e-06, |
| "loss": 0.2805, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.5564924114671164, |
| "grad_norm": 0.2177529625977393, |
| "learning_rate": 8.21361648969394e-06, |
| "loss": 0.2954, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.558178752107926, |
| "grad_norm": 0.2174531141885469, |
| "learning_rate": 8.182386008744534e-06, |
| "loss": 0.2812, |
| "step": 1517 |
| }, |
| { |
| "epoch": 2.559865092748735, |
| "grad_norm": 0.24011443306706076, |
| "learning_rate": 8.15115552779513e-06, |
| "loss": 0.2847, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.561551433389545, |
| "grad_norm": 0.2211842789290804, |
| "learning_rate": 8.119925046845723e-06, |
| "loss": 0.2968, |
| "step": 1519 |
| }, |
| { |
| "epoch": 2.563237774030354, |
| "grad_norm": 0.2027166936230016, |
| "learning_rate": 8.088694565896315e-06, |
| "loss": 0.2901, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.5649241146711637, |
| "grad_norm": 0.23143090522204798, |
| "learning_rate": 8.057464084946908e-06, |
| "loss": 0.3004, |
| "step": 1521 |
| }, |
| { |
| "epoch": 2.566610455311973, |
| "grad_norm": 0.21405158236348512, |
| "learning_rate": 8.026233603997502e-06, |
| "loss": 0.2771, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.5682967959527825, |
| "grad_norm": 0.25973051681141873, |
| "learning_rate": 7.995003123048095e-06, |
| "loss": 0.2773, |
| "step": 1523 |
| }, |
| { |
| "epoch": 2.569983136593592, |
| "grad_norm": 0.23015760509575306, |
| "learning_rate": 7.963772642098687e-06, |
| "loss": 0.2954, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.5716694772344013, |
| "grad_norm": 0.20994696566124324, |
| "learning_rate": 7.932542161149283e-06, |
| "loss": 0.2817, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.573355817875211, |
| "grad_norm": 0.254010145129785, |
| "learning_rate": 7.901311680199876e-06, |
| "loss": 0.2955, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.57504215851602, |
| "grad_norm": 0.24545591443052275, |
| "learning_rate": 7.87008119925047e-06, |
| "loss": 0.2803, |
| "step": 1527 |
| }, |
| { |
| "epoch": 2.5767284991568298, |
| "grad_norm": 0.21517156771568222, |
| "learning_rate": 7.838850718301062e-06, |
| "loss": 0.2876, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.578414839797639, |
| "grad_norm": 0.22365576198174414, |
| "learning_rate": 7.807620237351655e-06, |
| "loss": 0.2877, |
| "step": 1529 |
| }, |
| { |
| "epoch": 2.5801011804384486, |
| "grad_norm": 0.21885168775468924, |
| "learning_rate": 7.776389756402249e-06, |
| "loss": 0.2779, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.5817875210792582, |
| "grad_norm": 0.21740253104686508, |
| "learning_rate": 7.745159275452842e-06, |
| "loss": 0.2813, |
| "step": 1531 |
| }, |
| { |
| "epoch": 2.5834738617200674, |
| "grad_norm": 0.20214237553711495, |
| "learning_rate": 7.713928794503436e-06, |
| "loss": 0.2882, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.5851602023608766, |
| "grad_norm": 0.23227209112665623, |
| "learning_rate": 7.68269831355403e-06, |
| "loss": 0.309, |
| "step": 1533 |
| }, |
| { |
| "epoch": 2.5868465430016863, |
| "grad_norm": 0.21669129119869118, |
| "learning_rate": 7.651467832604623e-06, |
| "loss": 0.2893, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.588532883642496, |
| "grad_norm": 0.20972553341344566, |
| "learning_rate": 7.6202373516552155e-06, |
| "loss": 0.2829, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.590219224283305, |
| "grad_norm": 0.21727304295862815, |
| "learning_rate": 7.589006870705809e-06, |
| "loss": 0.27, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.5919055649241147, |
| "grad_norm": 0.20374504478680835, |
| "learning_rate": 7.5577763897564035e-06, |
| "loss": 0.2908, |
| "step": 1537 |
| }, |
| { |
| "epoch": 2.5935919055649244, |
| "grad_norm": 0.2066348649118724, |
| "learning_rate": 7.526545908806996e-06, |
| "loss": 0.2761, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.5952782462057336, |
| "grad_norm": 0.20726056661289594, |
| "learning_rate": 7.49531542785759e-06, |
| "loss": 0.2796, |
| "step": 1539 |
| }, |
| { |
| "epoch": 2.5969645868465427, |
| "grad_norm": 0.24056296732359322, |
| "learning_rate": 7.4640849469081824e-06, |
| "loss": 0.2934, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.5986509274873524, |
| "grad_norm": 0.20786597164807977, |
| "learning_rate": 7.432854465958776e-06, |
| "loss": 0.2743, |
| "step": 1541 |
| }, |
| { |
| "epoch": 2.600337268128162, |
| "grad_norm": 0.2264554556337058, |
| "learning_rate": 7.401623985009369e-06, |
| "loss": 0.2799, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.602023608768971, |
| "grad_norm": 0.20964648985122444, |
| "learning_rate": 7.370393504059962e-06, |
| "loss": 0.2738, |
| "step": 1543 |
| }, |
| { |
| "epoch": 2.603709949409781, |
| "grad_norm": 0.2288453161556054, |
| "learning_rate": 7.339163023110557e-06, |
| "loss": 0.2847, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.6053962900505905, |
| "grad_norm": 0.21159917871195397, |
| "learning_rate": 7.30793254216115e-06, |
| "loss": 0.2812, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.6070826306913997, |
| "grad_norm": 0.2069094857828839, |
| "learning_rate": 7.276702061211743e-06, |
| "loss": 0.2931, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.608768971332209, |
| "grad_norm": 0.21060482294692173, |
| "learning_rate": 7.2454715802623364e-06, |
| "loss": 0.2731, |
| "step": 1547 |
| }, |
| { |
| "epoch": 2.6104553119730185, |
| "grad_norm": 0.25613020925535795, |
| "learning_rate": 7.214241099312929e-06, |
| "loss": 0.2943, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.612141652613828, |
| "grad_norm": 0.24576894427795132, |
| "learning_rate": 7.183010618363523e-06, |
| "loss": 0.2913, |
| "step": 1549 |
| }, |
| { |
| "epoch": 2.6138279932546373, |
| "grad_norm": 0.21700719337456373, |
| "learning_rate": 7.151780137414117e-06, |
| "loss": 0.2944, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.615514333895447, |
| "grad_norm": 0.18957741799967104, |
| "learning_rate": 7.120549656464711e-06, |
| "loss": 0.2724, |
| "step": 1551 |
| }, |
| { |
| "epoch": 2.6172006745362566, |
| "grad_norm": 0.2110565071188753, |
| "learning_rate": 7.089319175515303e-06, |
| "loss": 0.285, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.618887015177066, |
| "grad_norm": 0.23081971460825607, |
| "learning_rate": 7.058088694565897e-06, |
| "loss": 0.2837, |
| "step": 1553 |
| }, |
| { |
| "epoch": 2.620573355817875, |
| "grad_norm": 0.2570191985673241, |
| "learning_rate": 7.02685821361649e-06, |
| "loss": 0.2861, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.6222596964586846, |
| "grad_norm": 0.22950739326313954, |
| "learning_rate": 6.995627732667083e-06, |
| "loss": 0.2868, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.6239460370994943, |
| "grad_norm": 0.21988490556961662, |
| "learning_rate": 6.964397251717676e-06, |
| "loss": 0.3013, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.6256323777403034, |
| "grad_norm": 0.21632940269907694, |
| "learning_rate": 6.933166770768271e-06, |
| "loss": 0.2655, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.627318718381113, |
| "grad_norm": 0.23015336720772667, |
| "learning_rate": 6.901936289818864e-06, |
| "loss": 0.2783, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.6290050590219223, |
| "grad_norm": 0.23068020817280607, |
| "learning_rate": 6.870705808869457e-06, |
| "loss": 0.2933, |
| "step": 1559 |
| }, |
| { |
| "epoch": 2.630691399662732, |
| "grad_norm": 0.22117581616378715, |
| "learning_rate": 6.83947532792005e-06, |
| "loss": 0.2891, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.632377740303541, |
| "grad_norm": 0.19747851535133804, |
| "learning_rate": 6.8082448469706436e-06, |
| "loss": 0.2675, |
| "step": 1561 |
| }, |
| { |
| "epoch": 2.6340640809443507, |
| "grad_norm": 0.23789835021633737, |
| "learning_rate": 6.777014366021236e-06, |
| "loss": 0.2628, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.6357504215851604, |
| "grad_norm": 0.24583892966645363, |
| "learning_rate": 6.745783885071831e-06, |
| "loss": 0.2888, |
| "step": 1563 |
| }, |
| { |
| "epoch": 2.6374367622259696, |
| "grad_norm": 0.23938417280190757, |
| "learning_rate": 6.714553404122424e-06, |
| "loss": 0.2979, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.639123102866779, |
| "grad_norm": 0.21557522746413682, |
| "learning_rate": 6.683322923173018e-06, |
| "loss": 0.2879, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.6408094435075884, |
| "grad_norm": 0.2237697418435905, |
| "learning_rate": 6.6520924422236105e-06, |
| "loss": 0.282, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.642495784148398, |
| "grad_norm": 0.2502379166017792, |
| "learning_rate": 6.620861961274204e-06, |
| "loss": 0.2756, |
| "step": 1567 |
| }, |
| { |
| "epoch": 2.6441821247892072, |
| "grad_norm": 0.22915242171625275, |
| "learning_rate": 6.589631480324797e-06, |
| "loss": 0.2903, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.645868465430017, |
| "grad_norm": 0.21731400403245665, |
| "learning_rate": 6.55840099937539e-06, |
| "loss": 0.2805, |
| "step": 1569 |
| }, |
| { |
| "epoch": 2.6475548060708265, |
| "grad_norm": 0.21670425873688098, |
| "learning_rate": 6.527170518425985e-06, |
| "loss": 0.2753, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.6492411467116357, |
| "grad_norm": 0.2451135117586469, |
| "learning_rate": 6.495940037476577e-06, |
| "loss": 0.2703, |
| "step": 1571 |
| }, |
| { |
| "epoch": 2.6509274873524453, |
| "grad_norm": 0.24833441155424285, |
| "learning_rate": 6.464709556527171e-06, |
| "loss": 0.2837, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.6526138279932545, |
| "grad_norm": 0.21463588266906852, |
| "learning_rate": 6.4334790755777645e-06, |
| "loss": 0.2874, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.654300168634064, |
| "grad_norm": 0.214244480177014, |
| "learning_rate": 6.402248594628357e-06, |
| "loss": 0.2833, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.6559865092748733, |
| "grad_norm": 0.20655998209421197, |
| "learning_rate": 6.371018113678951e-06, |
| "loss": 0.2763, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.657672849915683, |
| "grad_norm": 0.2115837356367195, |
| "learning_rate": 6.339787632729545e-06, |
| "loss": 0.2984, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.6593591905564926, |
| "grad_norm": 0.21949993697455955, |
| "learning_rate": 6.308557151780138e-06, |
| "loss": 0.31, |
| "step": 1577 |
| }, |
| { |
| "epoch": 2.661045531197302, |
| "grad_norm": 0.21648433072948026, |
| "learning_rate": 6.277326670830731e-06, |
| "loss": 0.2699, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.6627318718381114, |
| "grad_norm": 0.2086925321556406, |
| "learning_rate": 6.246096189881324e-06, |
| "loss": 0.2871, |
| "step": 1579 |
| }, |
| { |
| "epoch": 2.6644182124789206, |
| "grad_norm": 0.21713294641125344, |
| "learning_rate": 6.214865708931918e-06, |
| "loss": 0.2755, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.6661045531197303, |
| "grad_norm": 0.2122874249893408, |
| "learning_rate": 6.183635227982511e-06, |
| "loss": 0.2784, |
| "step": 1581 |
| }, |
| { |
| "epoch": 2.6677908937605395, |
| "grad_norm": 0.21600483492813047, |
| "learning_rate": 6.152404747033105e-06, |
| "loss": 0.2928, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.669477234401349, |
| "grad_norm": 0.23268700923125246, |
| "learning_rate": 6.121174266083697e-06, |
| "loss": 0.3, |
| "step": 1583 |
| }, |
| { |
| "epoch": 2.6711635750421587, |
| "grad_norm": 0.217511442703724, |
| "learning_rate": 6.089943785134292e-06, |
| "loss": 0.2661, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.672849915682968, |
| "grad_norm": 0.22021101235547547, |
| "learning_rate": 6.0587133041848845e-06, |
| "loss": 0.3002, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.6745362563237776, |
| "grad_norm": 0.2227726635451435, |
| "learning_rate": 6.027482823235478e-06, |
| "loss": 0.2667, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.6762225969645868, |
| "grad_norm": 0.19704649629300044, |
| "learning_rate": 5.996252342286072e-06, |
| "loss": 0.2726, |
| "step": 1587 |
| }, |
| { |
| "epoch": 2.6779089376053964, |
| "grad_norm": 0.22161664573034123, |
| "learning_rate": 5.965021861336665e-06, |
| "loss": 0.2889, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.6795952782462056, |
| "grad_norm": 0.2123347602164092, |
| "learning_rate": 5.933791380387258e-06, |
| "loss": 0.2887, |
| "step": 1589 |
| }, |
| { |
| "epoch": 2.681281618887015, |
| "grad_norm": 0.21222045114230656, |
| "learning_rate": 5.902560899437851e-06, |
| "loss": 0.2812, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.682967959527825, |
| "grad_norm": 0.205428838759869, |
| "learning_rate": 5.871330418488445e-06, |
| "loss": 0.2819, |
| "step": 1591 |
| }, |
| { |
| "epoch": 2.684654300168634, |
| "grad_norm": 0.22794111641253004, |
| "learning_rate": 5.8400999375390385e-06, |
| "loss": 0.294, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.6863406408094432, |
| "grad_norm": 0.2188799104236607, |
| "learning_rate": 5.808869456589631e-06, |
| "loss": 0.2908, |
| "step": 1593 |
| }, |
| { |
| "epoch": 2.688026981450253, |
| "grad_norm": 0.22322959770358233, |
| "learning_rate": 5.777638975640226e-06, |
| "loss": 0.2761, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.6897133220910625, |
| "grad_norm": 0.21789366885242073, |
| "learning_rate": 5.746408494690818e-06, |
| "loss": 0.2927, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.6913996627318717, |
| "grad_norm": 0.2083701451315594, |
| "learning_rate": 5.715178013741412e-06, |
| "loss": 0.278, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.6930860033726813, |
| "grad_norm": 0.22030127660422125, |
| "learning_rate": 5.683947532792005e-06, |
| "loss": 0.2869, |
| "step": 1597 |
| }, |
| { |
| "epoch": 2.694772344013491, |
| "grad_norm": 0.21709081444096415, |
| "learning_rate": 5.652717051842599e-06, |
| "loss": 0.2861, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.6964586846543, |
| "grad_norm": 0.3235206989069253, |
| "learning_rate": 5.621486570893192e-06, |
| "loss": 0.3022, |
| "step": 1599 |
| }, |
| { |
| "epoch": 2.6981450252951094, |
| "grad_norm": 0.2187203609037221, |
| "learning_rate": 5.590256089943786e-06, |
| "loss": 0.2777, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.699831365935919, |
| "grad_norm": 0.21029848278998103, |
| "learning_rate": 5.559025608994379e-06, |
| "loss": 0.2814, |
| "step": 1601 |
| }, |
| { |
| "epoch": 2.7015177065767286, |
| "grad_norm": 0.23673420516168278, |
| "learning_rate": 5.527795128044972e-06, |
| "loss": 0.2869, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.703204047217538, |
| "grad_norm": 0.20215563439209264, |
| "learning_rate": 5.496564647095565e-06, |
| "loss": 0.2758, |
| "step": 1603 |
| }, |
| { |
| "epoch": 2.7048903878583475, |
| "grad_norm": 0.2026247743653848, |
| "learning_rate": 5.465334166146159e-06, |
| "loss": 0.2708, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.706576728499157, |
| "grad_norm": 0.19435807754322892, |
| "learning_rate": 5.434103685196752e-06, |
| "loss": 0.2853, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.7082630691399663, |
| "grad_norm": 0.2053478996815802, |
| "learning_rate": 5.402873204247346e-06, |
| "loss": 0.3004, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.7099494097807755, |
| "grad_norm": 0.2202873897430332, |
| "learning_rate": 5.371642723297939e-06, |
| "loss": 0.2855, |
| "step": 1607 |
| }, |
| { |
| "epoch": 2.711635750421585, |
| "grad_norm": 0.20676180805282937, |
| "learning_rate": 5.340412242348533e-06, |
| "loss": 0.2808, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.7133220910623947, |
| "grad_norm": 0.1941446750107508, |
| "learning_rate": 5.3091817613991255e-06, |
| "loss": 0.2749, |
| "step": 1609 |
| }, |
| { |
| "epoch": 2.715008431703204, |
| "grad_norm": 0.19889620088958468, |
| "learning_rate": 5.277951280449719e-06, |
| "loss": 0.2782, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.7166947723440136, |
| "grad_norm": 0.2205070199105159, |
| "learning_rate": 5.2467207995003126e-06, |
| "loss": 0.2835, |
| "step": 1611 |
| }, |
| { |
| "epoch": 2.718381112984823, |
| "grad_norm": 0.20007123638846067, |
| "learning_rate": 5.215490318550906e-06, |
| "loss": 0.2969, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.7200674536256324, |
| "grad_norm": 0.2089199986979393, |
| "learning_rate": 5.1842598376015e-06, |
| "loss": 0.297, |
| "step": 1613 |
| }, |
| { |
| "epoch": 2.7217537942664416, |
| "grad_norm": 0.20191998141386186, |
| "learning_rate": 5.153029356652092e-06, |
| "loss": 0.2935, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.7234401349072512, |
| "grad_norm": 0.19688379446144275, |
| "learning_rate": 5.121798875702686e-06, |
| "loss": 0.2839, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.725126475548061, |
| "grad_norm": 0.21330583947097048, |
| "learning_rate": 5.0905683947532795e-06, |
| "loss": 0.2713, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.72681281618887, |
| "grad_norm": 0.21573383699863047, |
| "learning_rate": 5.059337913803873e-06, |
| "loss": 0.2945, |
| "step": 1617 |
| }, |
| { |
| "epoch": 2.7284991568296797, |
| "grad_norm": 0.20264899837278788, |
| "learning_rate": 5.028107432854466e-06, |
| "loss": 0.2818, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.730185497470489, |
| "grad_norm": 0.21608446927756567, |
| "learning_rate": 4.996876951905059e-06, |
| "loss": 0.2864, |
| "step": 1619 |
| }, |
| { |
| "epoch": 2.7318718381112985, |
| "grad_norm": 0.20370501968918966, |
| "learning_rate": 4.965646470955653e-06, |
| "loss": 0.2703, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.7335581787521077, |
| "grad_norm": 0.2080714326636247, |
| "learning_rate": 4.934415990006246e-06, |
| "loss": 0.292, |
| "step": 1621 |
| }, |
| { |
| "epoch": 2.7352445193929174, |
| "grad_norm": 0.2032840750089152, |
| "learning_rate": 4.903185509056839e-06, |
| "loss": 0.2804, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.736930860033727, |
| "grad_norm": 0.19331776690610536, |
| "learning_rate": 4.8719550281074335e-06, |
| "loss": 0.2755, |
| "step": 1623 |
| }, |
| { |
| "epoch": 2.738617200674536, |
| "grad_norm": 0.21693868552887213, |
| "learning_rate": 4.840724547158026e-06, |
| "loss": 0.2834, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.740303541315346, |
| "grad_norm": 0.20201398659734215, |
| "learning_rate": 4.80949406620862e-06, |
| "loss": 0.281, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.741989881956155, |
| "grad_norm": 0.20817935225884962, |
| "learning_rate": 4.778263585259213e-06, |
| "loss": 0.276, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.7436762225969646, |
| "grad_norm": 0.22331742883655747, |
| "learning_rate": 4.747033104309807e-06, |
| "loss": 0.2868, |
| "step": 1627 |
| }, |
| { |
| "epoch": 2.745362563237774, |
| "grad_norm": 0.20820921113077337, |
| "learning_rate": 4.7158026233603995e-06, |
| "loss": 0.2806, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.7470489038785835, |
| "grad_norm": 0.20234625534827635, |
| "learning_rate": 4.684572142410994e-06, |
| "loss": 0.2799, |
| "step": 1629 |
| }, |
| { |
| "epoch": 2.748735244519393, |
| "grad_norm": 0.2117057816531713, |
| "learning_rate": 4.653341661461587e-06, |
| "loss": 0.2959, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.7504215851602023, |
| "grad_norm": 0.21119392054935449, |
| "learning_rate": 4.62211118051218e-06, |
| "loss": 0.2968, |
| "step": 1631 |
| }, |
| { |
| "epoch": 2.752107925801012, |
| "grad_norm": 0.20784201095611002, |
| "learning_rate": 4.590880699562773e-06, |
| "loss": 0.2849, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.753794266441821, |
| "grad_norm": 0.2069161064249693, |
| "learning_rate": 4.559650218613367e-06, |
| "loss": 0.2809, |
| "step": 1633 |
| }, |
| { |
| "epoch": 2.7554806070826308, |
| "grad_norm": 0.2216730996786303, |
| "learning_rate": 4.52841973766396e-06, |
| "loss": 0.2831, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.75716694772344, |
| "grad_norm": 0.21586354572013416, |
| "learning_rate": 4.4971892567145535e-06, |
| "loss": 0.2856, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.7588532883642496, |
| "grad_norm": 0.20826728778824785, |
| "learning_rate": 4.465958775765147e-06, |
| "loss": 0.2884, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.7605396290050592, |
| "grad_norm": 0.1934541402293032, |
| "learning_rate": 4.434728294815741e-06, |
| "loss": 0.2877, |
| "step": 1637 |
| }, |
| { |
| "epoch": 2.7622259696458684, |
| "grad_norm": 0.19440484337210331, |
| "learning_rate": 4.403497813866333e-06, |
| "loss": 0.2865, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.763912310286678, |
| "grad_norm": 0.20329351934904272, |
| "learning_rate": 4.372267332916928e-06, |
| "loss": 0.2805, |
| "step": 1639 |
| }, |
| { |
| "epoch": 2.7655986509274872, |
| "grad_norm": 0.20682642131681114, |
| "learning_rate": 4.34103685196752e-06, |
| "loss": 0.2803, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.767284991568297, |
| "grad_norm": 0.1965628164276994, |
| "learning_rate": 4.309806371018114e-06, |
| "loss": 0.277, |
| "step": 1641 |
| }, |
| { |
| "epoch": 2.768971332209106, |
| "grad_norm": 0.20506125185258076, |
| "learning_rate": 4.2785758900687075e-06, |
| "loss": 0.2916, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.7706576728499157, |
| "grad_norm": 0.21162620440334876, |
| "learning_rate": 4.247345409119301e-06, |
| "loss": 0.2822, |
| "step": 1643 |
| }, |
| { |
| "epoch": 2.7723440134907253, |
| "grad_norm": 0.20567288650070736, |
| "learning_rate": 4.216114928169894e-06, |
| "loss": 0.2851, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.7740303541315345, |
| "grad_norm": 0.20508283384581297, |
| "learning_rate": 4.184884447220487e-06, |
| "loss": 0.2836, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.775716694772344, |
| "grad_norm": 0.20928732222500257, |
| "learning_rate": 4.153653966271081e-06, |
| "loss": 0.2838, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.7774030354131534, |
| "grad_norm": 0.2109337211436152, |
| "learning_rate": 4.122423485321674e-06, |
| "loss": 0.2804, |
| "step": 1647 |
| }, |
| { |
| "epoch": 2.779089376053963, |
| "grad_norm": 0.2077393204067855, |
| "learning_rate": 4.091193004372267e-06, |
| "loss": 0.2808, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.780775716694772, |
| "grad_norm": 0.21017828523019585, |
| "learning_rate": 4.0599625234228615e-06, |
| "loss": 0.2808, |
| "step": 1649 |
| }, |
| { |
| "epoch": 2.782462057335582, |
| "grad_norm": 0.2031729639742959, |
| "learning_rate": 4.028732042473454e-06, |
| "loss": 0.2732, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.7841483979763915, |
| "grad_norm": 0.21512407032855863, |
| "learning_rate": 3.997501561524048e-06, |
| "loss": 0.2984, |
| "step": 1651 |
| }, |
| { |
| "epoch": 2.7858347386172007, |
| "grad_norm": 0.196729154608354, |
| "learning_rate": 3.966271080574641e-06, |
| "loss": 0.2627, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.78752107925801, |
| "grad_norm": 0.2172289085545206, |
| "learning_rate": 3.935040599625235e-06, |
| "loss": 0.2868, |
| "step": 1653 |
| }, |
| { |
| "epoch": 2.7892074198988195, |
| "grad_norm": 0.1941943020185286, |
| "learning_rate": 3.9038101186758275e-06, |
| "loss": 0.2834, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.790893760539629, |
| "grad_norm": 0.2057212021649519, |
| "learning_rate": 3.872579637726421e-06, |
| "loss": 0.3031, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.7925801011804383, |
| "grad_norm": 0.19922608550472234, |
| "learning_rate": 3.841349156777015e-06, |
| "loss": 0.2669, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.794266441821248, |
| "grad_norm": 0.2022375243308089, |
| "learning_rate": 3.8101186758276078e-06, |
| "loss": 0.2904, |
| "step": 1657 |
| }, |
| { |
| "epoch": 2.7959527824620576, |
| "grad_norm": 0.22406966976385426, |
| "learning_rate": 3.7788881948782017e-06, |
| "loss": 0.3053, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.7976391231028668, |
| "grad_norm": 0.19858480327231595, |
| "learning_rate": 3.747657713928795e-06, |
| "loss": 0.2818, |
| "step": 1659 |
| }, |
| { |
| "epoch": 2.799325463743676, |
| "grad_norm": 0.20163790196870526, |
| "learning_rate": 3.716427232979388e-06, |
| "loss": 0.2957, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.8010118043844856, |
| "grad_norm": 0.2228194086576066, |
| "learning_rate": 3.685196752029981e-06, |
| "loss": 0.2902, |
| "step": 1661 |
| }, |
| { |
| "epoch": 2.8026981450252952, |
| "grad_norm": 0.20667251938872913, |
| "learning_rate": 3.653966271080575e-06, |
| "loss": 0.2669, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.8043844856661044, |
| "grad_norm": 0.20317918546557187, |
| "learning_rate": 3.6227357901311682e-06, |
| "loss": 0.2842, |
| "step": 1663 |
| }, |
| { |
| "epoch": 2.806070826306914, |
| "grad_norm": 0.2013232009325853, |
| "learning_rate": 3.5915053091817613e-06, |
| "loss": 0.2836, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.8077571669477237, |
| "grad_norm": 0.2124212170531947, |
| "learning_rate": 3.5602748282323553e-06, |
| "loss": 0.283, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.809443507588533, |
| "grad_norm": 0.20253081915800264, |
| "learning_rate": 3.5290443472829484e-06, |
| "loss": 0.2853, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.811129848229342, |
| "grad_norm": 0.20809367640615845, |
| "learning_rate": 3.4978138663335416e-06, |
| "loss": 0.2754, |
| "step": 1667 |
| }, |
| { |
| "epoch": 2.8128161888701517, |
| "grad_norm": 0.2889121452301856, |
| "learning_rate": 3.4665833853841355e-06, |
| "loss": 0.2884, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.8145025295109614, |
| "grad_norm": 0.20087128908636068, |
| "learning_rate": 3.4353529044347287e-06, |
| "loss": 0.281, |
| "step": 1669 |
| }, |
| { |
| "epoch": 2.8161888701517706, |
| "grad_norm": 0.2074070673281996, |
| "learning_rate": 3.4041224234853218e-06, |
| "loss": 0.2847, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.81787521079258, |
| "grad_norm": 0.19690785352695042, |
| "learning_rate": 3.3728919425359153e-06, |
| "loss": 0.2793, |
| "step": 1671 |
| }, |
| { |
| "epoch": 2.8195615514333894, |
| "grad_norm": 0.19009121705370408, |
| "learning_rate": 3.341661461586509e-06, |
| "loss": 0.2669, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.821247892074199, |
| "grad_norm": 0.19098650836641, |
| "learning_rate": 3.310430980637102e-06, |
| "loss": 0.2743, |
| "step": 1673 |
| }, |
| { |
| "epoch": 2.822934232715008, |
| "grad_norm": 0.2028744829991129, |
| "learning_rate": 3.279200499687695e-06, |
| "loss": 0.2761, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.824620573355818, |
| "grad_norm": 0.2044257995581774, |
| "learning_rate": 3.2479700187382887e-06, |
| "loss": 0.2946, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.8263069139966275, |
| "grad_norm": 0.20051532112544362, |
| "learning_rate": 3.2167395377888822e-06, |
| "loss": 0.2884, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.8279932546374367, |
| "grad_norm": 0.20553754280420816, |
| "learning_rate": 3.1855090568394754e-06, |
| "loss": 0.2886, |
| "step": 1677 |
| }, |
| { |
| "epoch": 2.8296795952782463, |
| "grad_norm": 0.200052112821026, |
| "learning_rate": 3.154278575890069e-06, |
| "loss": 0.283, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.8313659359190555, |
| "grad_norm": 0.21047714158137912, |
| "learning_rate": 3.123048094940662e-06, |
| "loss": 0.2981, |
| "step": 1679 |
| }, |
| { |
| "epoch": 2.833052276559865, |
| "grad_norm": 0.21403069862712779, |
| "learning_rate": 3.0918176139912556e-06, |
| "loss": 0.2998, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.8347386172006743, |
| "grad_norm": 0.21533734091940915, |
| "learning_rate": 3.0605871330418487e-06, |
| "loss": 0.2925, |
| "step": 1681 |
| }, |
| { |
| "epoch": 2.836424957841484, |
| "grad_norm": 0.19825568331336288, |
| "learning_rate": 3.0293566520924423e-06, |
| "loss": 0.2739, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.8381112984822936, |
| "grad_norm": 0.19901702391809034, |
| "learning_rate": 2.998126171143036e-06, |
| "loss": 0.2813, |
| "step": 1683 |
| }, |
| { |
| "epoch": 2.839797639123103, |
| "grad_norm": 0.20302512033337275, |
| "learning_rate": 2.966895690193629e-06, |
| "loss": 0.2903, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.8414839797639124, |
| "grad_norm": 0.20344695311378921, |
| "learning_rate": 2.9356652092442225e-06, |
| "loss": 0.2932, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.8431703204047216, |
| "grad_norm": 0.19426605288957086, |
| "learning_rate": 2.9044347282948156e-06, |
| "loss": 0.2922, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.8448566610455313, |
| "grad_norm": 0.19876694012610552, |
| "learning_rate": 2.873204247345409e-06, |
| "loss": 0.2938, |
| "step": 1687 |
| }, |
| { |
| "epoch": 2.8465430016863404, |
| "grad_norm": 0.2138763936205647, |
| "learning_rate": 2.8419737663960027e-06, |
| "loss": 0.2792, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.84822934232715, |
| "grad_norm": 0.21020119549110686, |
| "learning_rate": 2.810743285446596e-06, |
| "loss": 0.2854, |
| "step": 1689 |
| }, |
| { |
| "epoch": 2.8499156829679597, |
| "grad_norm": 0.20917019637007714, |
| "learning_rate": 2.7795128044971894e-06, |
| "loss": 0.2742, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.851602023608769, |
| "grad_norm": 0.20234378390911384, |
| "learning_rate": 2.7482823235477825e-06, |
| "loss": 0.286, |
| "step": 1691 |
| }, |
| { |
| "epoch": 2.8532883642495785, |
| "grad_norm": 0.20046123812374947, |
| "learning_rate": 2.717051842598376e-06, |
| "loss": 0.2772, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.8549747048903877, |
| "grad_norm": 0.2172720515949868, |
| "learning_rate": 2.6858213616489696e-06, |
| "loss": 0.2839, |
| "step": 1693 |
| }, |
| { |
| "epoch": 2.8566610455311974, |
| "grad_norm": 0.19873193377249784, |
| "learning_rate": 2.6545908806995627e-06, |
| "loss": 0.2884, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.8583473861720066, |
| "grad_norm": 0.20156031983168393, |
| "learning_rate": 2.6233603997501563e-06, |
| "loss": 0.2889, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.860033726812816, |
| "grad_norm": 0.2046721172478191, |
| "learning_rate": 2.59212991880075e-06, |
| "loss": 0.2886, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.861720067453626, |
| "grad_norm": 0.196703772022903, |
| "learning_rate": 2.560899437851343e-06, |
| "loss": 0.2701, |
| "step": 1697 |
| }, |
| { |
| "epoch": 2.863406408094435, |
| "grad_norm": 0.19845494425958038, |
| "learning_rate": 2.5296689569019365e-06, |
| "loss": 0.2846, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.8650927487352447, |
| "grad_norm": 0.19204603536346423, |
| "learning_rate": 2.4984384759525296e-06, |
| "loss": 0.2989, |
| "step": 1699 |
| }, |
| { |
| "epoch": 2.866779089376054, |
| "grad_norm": 0.19912137719570142, |
| "learning_rate": 2.467207995003123e-06, |
| "loss": 0.2735, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.8684654300168635, |
| "grad_norm": 0.1975812173384509, |
| "learning_rate": 2.4359775140537167e-06, |
| "loss": 0.2865, |
| "step": 1701 |
| }, |
| { |
| "epoch": 2.8701517706576727, |
| "grad_norm": 0.21832969792375734, |
| "learning_rate": 2.40474703310431e-06, |
| "loss": 0.2912, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.8718381112984823, |
| "grad_norm": 0.19774318843932107, |
| "learning_rate": 2.3735165521549034e-06, |
| "loss": 0.288, |
| "step": 1703 |
| }, |
| { |
| "epoch": 2.873524451939292, |
| "grad_norm": 0.20387387288215883, |
| "learning_rate": 2.342286071205497e-06, |
| "loss": 0.2705, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.875210792580101, |
| "grad_norm": 0.19751525186138755, |
| "learning_rate": 2.31105559025609e-06, |
| "loss": 0.2948, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.876897133220911, |
| "grad_norm": 0.18478364522654045, |
| "learning_rate": 2.2798251093066836e-06, |
| "loss": 0.2923, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.87858347386172, |
| "grad_norm": 0.21012748920962054, |
| "learning_rate": 2.2485946283572767e-06, |
| "loss": 0.2694, |
| "step": 1707 |
| }, |
| { |
| "epoch": 2.8802698145025296, |
| "grad_norm": 0.19216064779198222, |
| "learning_rate": 2.2173641474078703e-06, |
| "loss": 0.2814, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.881956155143339, |
| "grad_norm": 0.2192228501690963, |
| "learning_rate": 2.186133666458464e-06, |
| "loss": 0.2883, |
| "step": 1709 |
| }, |
| { |
| "epoch": 2.8836424957841484, |
| "grad_norm": 0.19038955794138343, |
| "learning_rate": 2.154903185509057e-06, |
| "loss": 0.2774, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.885328836424958, |
| "grad_norm": 0.21003488814268553, |
| "learning_rate": 2.1236727045596505e-06, |
| "loss": 0.2795, |
| "step": 1711 |
| }, |
| { |
| "epoch": 2.8870151770657673, |
| "grad_norm": 0.19468953854662951, |
| "learning_rate": 2.0924422236102436e-06, |
| "loss": 0.2716, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.8887015177065765, |
| "grad_norm": 0.38264106870328357, |
| "learning_rate": 2.061211742660837e-06, |
| "loss": 0.2843, |
| "step": 1713 |
| }, |
| { |
| "epoch": 2.890387858347386, |
| "grad_norm": 0.2055405553127052, |
| "learning_rate": 2.0299812617114307e-06, |
| "loss": 0.2868, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.8920741989881957, |
| "grad_norm": 0.18670256654302092, |
| "learning_rate": 1.998750780762024e-06, |
| "loss": 0.2824, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.893760539629005, |
| "grad_norm": 0.1935547163870898, |
| "learning_rate": 1.9675202998126174e-06, |
| "loss": 0.2808, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.8954468802698146, |
| "grad_norm": 0.1988377026687388, |
| "learning_rate": 1.9362898188632105e-06, |
| "loss": 0.2891, |
| "step": 1717 |
| }, |
| { |
| "epoch": 2.897133220910624, |
| "grad_norm": 0.18682277679930157, |
| "learning_rate": 1.9050593379138039e-06, |
| "loss": 0.2839, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.8988195615514334, |
| "grad_norm": 0.1883528830947179, |
| "learning_rate": 1.8738288569643974e-06, |
| "loss": 0.2812, |
| "step": 1719 |
| }, |
| { |
| "epoch": 2.9005059021922426, |
| "grad_norm": 0.18763676290215717, |
| "learning_rate": 1.8425983760149906e-06, |
| "loss": 0.2931, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.902192242833052, |
| "grad_norm": 0.20121785426301733, |
| "learning_rate": 1.8113678950655841e-06, |
| "loss": 0.2856, |
| "step": 1721 |
| }, |
| { |
| "epoch": 2.903878583473862, |
| "grad_norm": 0.1869843292530543, |
| "learning_rate": 1.7801374141161777e-06, |
| "loss": 0.2791, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.905564924114671, |
| "grad_norm": 0.1900538398572289, |
| "learning_rate": 1.7489069331667708e-06, |
| "loss": 0.2781, |
| "step": 1723 |
| }, |
| { |
| "epoch": 2.9072512647554807, |
| "grad_norm": 0.20594265363597425, |
| "learning_rate": 1.7176764522173643e-06, |
| "loss": 0.2796, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.9089376053962903, |
| "grad_norm": 0.20817463477274487, |
| "learning_rate": 1.6864459712679577e-06, |
| "loss": 0.3059, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.9106239460370995, |
| "grad_norm": 0.18804462500132332, |
| "learning_rate": 1.655215490318551e-06, |
| "loss": 0.2764, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.9123102866779087, |
| "grad_norm": 0.2045067376777732, |
| "learning_rate": 1.6239850093691443e-06, |
| "loss": 0.292, |
| "step": 1727 |
| }, |
| { |
| "epoch": 2.9139966273187183, |
| "grad_norm": 0.19175710659912157, |
| "learning_rate": 1.5927545284197377e-06, |
| "loss": 0.2855, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.915682967959528, |
| "grad_norm": 0.1988671701778532, |
| "learning_rate": 1.561524047470331e-06, |
| "loss": 0.2624, |
| "step": 1729 |
| }, |
| { |
| "epoch": 2.917369308600337, |
| "grad_norm": 0.20083443279579477, |
| "learning_rate": 1.5302935665209244e-06, |
| "loss": 0.2729, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.919055649241147, |
| "grad_norm": 0.18471159242802662, |
| "learning_rate": 1.499063085571518e-06, |
| "loss": 0.2724, |
| "step": 1731 |
| }, |
| { |
| "epoch": 2.920741989881956, |
| "grad_norm": 0.21068581000467754, |
| "learning_rate": 1.4678326046221112e-06, |
| "loss": 0.2801, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.9224283305227656, |
| "grad_norm": 0.1891544535796431, |
| "learning_rate": 1.4366021236727046e-06, |
| "loss": 0.2805, |
| "step": 1733 |
| }, |
| { |
| "epoch": 2.924114671163575, |
| "grad_norm": 0.20313222325906466, |
| "learning_rate": 1.405371642723298e-06, |
| "loss": 0.2871, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.9258010118043845, |
| "grad_norm": 0.20104211480825318, |
| "learning_rate": 1.3741411617738913e-06, |
| "loss": 0.3053, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.927487352445194, |
| "grad_norm": 0.19176846135912543, |
| "learning_rate": 1.3429106808244848e-06, |
| "loss": 0.2769, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.9291736930860033, |
| "grad_norm": 0.20721625332896973, |
| "learning_rate": 1.3116801998750781e-06, |
| "loss": 0.2887, |
| "step": 1737 |
| }, |
| { |
| "epoch": 2.930860033726813, |
| "grad_norm": 0.1948545750930062, |
| "learning_rate": 1.2804497189256715e-06, |
| "loss": 0.2911, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.932546374367622, |
| "grad_norm": 0.193561948006962, |
| "learning_rate": 1.2492192379762648e-06, |
| "loss": 0.2843, |
| "step": 1739 |
| }, |
| { |
| "epoch": 2.9342327150084317, |
| "grad_norm": 0.20976429959094334, |
| "learning_rate": 1.2179887570268584e-06, |
| "loss": 0.312, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.935919055649241, |
| "grad_norm": 0.20371403034140675, |
| "learning_rate": 1.1867582760774517e-06, |
| "loss": 0.2723, |
| "step": 1741 |
| }, |
| { |
| "epoch": 2.9376053962900506, |
| "grad_norm": 0.19139763737062507, |
| "learning_rate": 1.155527795128045e-06, |
| "loss": 0.3093, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.93929173693086, |
| "grad_norm": 0.20807080717830986, |
| "learning_rate": 1.1242973141786384e-06, |
| "loss": 0.3053, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.9409780775716694, |
| "grad_norm": 0.1851309824145173, |
| "learning_rate": 1.093066833229232e-06, |
| "loss": 0.2622, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.942664418212479, |
| "grad_norm": 0.18687903030152567, |
| "learning_rate": 1.0618363522798253e-06, |
| "loss": 0.2799, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.9443507588532882, |
| "grad_norm": 0.20483755437028875, |
| "learning_rate": 1.0306058713304186e-06, |
| "loss": 0.2949, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.946037099494098, |
| "grad_norm": 0.1977892003377331, |
| "learning_rate": 9.99375390381012e-07, |
| "loss": 0.2655, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.947723440134907, |
| "grad_norm": 0.18822466128381724, |
| "learning_rate": 9.681449094316053e-07, |
| "loss": 0.2745, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.9494097807757167, |
| "grad_norm": 0.19647124454247075, |
| "learning_rate": 9.369144284821987e-07, |
| "loss": 0.2848, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.9510961214165263, |
| "grad_norm": 0.199156961501018, |
| "learning_rate": 9.056839475327921e-07, |
| "loss": 0.3077, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.9527824620573355, |
| "grad_norm": 0.19076095952356142, |
| "learning_rate": 8.744534665833854e-07, |
| "loss": 0.2842, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.954468802698145, |
| "grad_norm": 0.19620460710416912, |
| "learning_rate": 8.432229856339788e-07, |
| "loss": 0.2827, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.9561551433389543, |
| "grad_norm": 0.18526171113725295, |
| "learning_rate": 8.119925046845722e-07, |
| "loss": 0.2806, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.957841483979764, |
| "grad_norm": 0.19345290483620012, |
| "learning_rate": 7.807620237351655e-07, |
| "loss": 0.2806, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.959527824620573, |
| "grad_norm": 0.20273068156401156, |
| "learning_rate": 7.49531542785759e-07, |
| "loss": 0.2929, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.961214165261383, |
| "grad_norm": 0.18398648217481314, |
| "learning_rate": 7.183010618363523e-07, |
| "loss": 0.289, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.9629005059021924, |
| "grad_norm": 0.18672180586987563, |
| "learning_rate": 6.870705808869456e-07, |
| "loss": 0.2694, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.9645868465430016, |
| "grad_norm": 0.1854297802284763, |
| "learning_rate": 6.558400999375391e-07, |
| "loss": 0.2744, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.9662731871838113, |
| "grad_norm": 0.18542133331185148, |
| "learning_rate": 6.246096189881324e-07, |
| "loss": 0.2921, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.9679595278246205, |
| "grad_norm": 0.19005635454359576, |
| "learning_rate": 5.933791380387259e-07, |
| "loss": 0.2886, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.96964586846543, |
| "grad_norm": 0.20487939341293834, |
| "learning_rate": 5.621486570893192e-07, |
| "loss": 0.2869, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.9713322091062393, |
| "grad_norm": 0.19380856113326692, |
| "learning_rate": 5.309181761399126e-07, |
| "loss": 0.2775, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.973018549747049, |
| "grad_norm": 0.1876885533427002, |
| "learning_rate": 4.99687695190506e-07, |
| "loss": 0.2705, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.9747048903878586, |
| "grad_norm": 0.19523022001958698, |
| "learning_rate": 4.6845721424109936e-07, |
| "loss": 0.2952, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.9763912310286678, |
| "grad_norm": 0.20164553295789006, |
| "learning_rate": 4.372267332916927e-07, |
| "loss": 0.2756, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.9780775716694774, |
| "grad_norm": 0.19436202075185965, |
| "learning_rate": 4.059962523422861e-07, |
| "loss": 0.2936, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.9797639123102866, |
| "grad_norm": 0.19528563314857372, |
| "learning_rate": 3.747657713928795e-07, |
| "loss": 0.2853, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.9814502529510962, |
| "grad_norm": 0.19637726757360177, |
| "learning_rate": 3.435352904434728e-07, |
| "loss": 0.2851, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.9831365935919054, |
| "grad_norm": 0.1926761803451411, |
| "learning_rate": 3.123048094940662e-07, |
| "loss": 0.2788, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.984822934232715, |
| "grad_norm": 0.19138794357950703, |
| "learning_rate": 2.810743285446596e-07, |
| "loss": 0.2755, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.9865092748735247, |
| "grad_norm": 0.19225770047830543, |
| "learning_rate": 2.49843847595253e-07, |
| "loss": 0.2868, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.988195615514334, |
| "grad_norm": 0.595894216120004, |
| "learning_rate": 2.1861336664584635e-07, |
| "loss": 0.2958, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.989881956155143, |
| "grad_norm": 0.18608324192924283, |
| "learning_rate": 1.8738288569643974e-07, |
| "loss": 0.277, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.9915682967959527, |
| "grad_norm": 0.1873496121673982, |
| "learning_rate": 1.561524047470331e-07, |
| "loss": 0.2755, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.9932546374367623, |
| "grad_norm": 0.19767801151433204, |
| "learning_rate": 1.249219237976265e-07, |
| "loss": 0.2727, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.9949409780775715, |
| "grad_norm": 0.19574725490820036, |
| "learning_rate": 9.369144284821987e-08, |
| "loss": 0.2865, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.996627318718381, |
| "grad_norm": 0.21185109566749932, |
| "learning_rate": 6.246096189881325e-08, |
| "loss": 0.2858, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.998313659359191, |
| "grad_norm": 0.1893865756399657, |
| "learning_rate": 3.123048094940662e-08, |
| "loss": 0.2848, |
| "step": 1778 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.1750454809313914, |
| "learning_rate": 0.0, |
| "loss": 0.2613, |
| "step": 1779 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1779, |
| "total_flos": 1.5197438984385987e+18, |
| "train_loss": 0.44912863300381384, |
| "train_runtime": 103340.3025, |
| "train_samples_per_second": 0.275, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1779, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5197438984385987e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|