{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14702, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006802142674942607, "grad_norm": 0.24588888883590698, "learning_rate": 2.445652173913044e-07, "loss": 0.7888, "step": 10 }, { "epoch": 0.0013604285349885215, "grad_norm": 0.2492022067308426, "learning_rate": 5.16304347826087e-07, "loss": 0.7899, "step": 20 }, { "epoch": 0.002040642802482782, "grad_norm": 0.24171292781829834, "learning_rate": 7.880434782608697e-07, "loss": 0.7777, "step": 30 }, { "epoch": 0.002720857069977043, "grad_norm": 0.24095626175403595, "learning_rate": 1.0597826086956523e-06, "loss": 0.7771, "step": 40 }, { "epoch": 0.0034010713374713034, "grad_norm": 0.2470710128545761, "learning_rate": 1.3315217391304349e-06, "loss": 0.7868, "step": 50 }, { "epoch": 0.004081285604965564, "grad_norm": 0.23980823159217834, "learning_rate": 1.6032608695652175e-06, "loss": 0.7888, "step": 60 }, { "epoch": 0.004761499872459825, "grad_norm": 0.24573369324207306, "learning_rate": 1.8750000000000003e-06, "loss": 0.7713, "step": 70 }, { "epoch": 0.005441714139954086, "grad_norm": 0.2483571171760559, "learning_rate": 2.146739130434783e-06, "loss": 0.7766, "step": 80 }, { "epoch": 0.006121928407448346, "grad_norm": 0.24448741972446442, "learning_rate": 2.4184782608695654e-06, "loss": 0.7937, "step": 90 }, { "epoch": 0.006802142674942607, "grad_norm": 0.2447367161512375, "learning_rate": 2.6902173913043476e-06, "loss": 0.7786, "step": 100 }, { "epoch": 0.007482356942436868, "grad_norm": 0.24462351202964783, "learning_rate": 2.9619565217391306e-06, "loss": 0.7701, "step": 110 }, { "epoch": 0.008162571209931128, "grad_norm": 0.23651371896266937, "learning_rate": 3.233695652173913e-06, "loss": 0.7855, "step": 120 }, { "epoch": 0.00884278547742539, "grad_norm": 0.25307324528694153, "learning_rate": 3.5054347826086958e-06, "loss": 0.776, "step": 130 }, { "epoch": 0.00952299974491965, "grad_norm": 0.24043457210063934, "learning_rate": 3.7771739130434788e-06, "loss": 0.7789, "step": 140 }, { "epoch": 0.01020321401241391, "grad_norm": 0.2457265853881836, "learning_rate": 4.048913043478261e-06, "loss": 0.7744, "step": 150 }, { "epoch": 0.010883428279908172, "grad_norm": 0.24025650322437286, "learning_rate": 4.320652173913044e-06, "loss": 0.7749, "step": 160 }, { "epoch": 0.011563642547402431, "grad_norm": 0.24393519759178162, "learning_rate": 4.5923913043478265e-06, "loss": 0.779, "step": 170 }, { "epoch": 0.012243856814896693, "grad_norm": 0.24013830721378326, "learning_rate": 4.864130434782609e-06, "loss": 0.7802, "step": 180 }, { "epoch": 0.012924071082390954, "grad_norm": 0.24367555975914001, "learning_rate": 5.135869565217392e-06, "loss": 0.7831, "step": 190 }, { "epoch": 0.013604285349885213, "grad_norm": 0.24819183349609375, "learning_rate": 5.407608695652174e-06, "loss": 0.7996, "step": 200 }, { "epoch": 0.014284499617379475, "grad_norm": 0.24860823154449463, "learning_rate": 5.679347826086957e-06, "loss": 0.799, "step": 210 }, { "epoch": 0.014964713884873736, "grad_norm": 0.2384982705116272, "learning_rate": 5.95108695652174e-06, "loss": 0.7795, "step": 220 }, { "epoch": 0.015644928152367996, "grad_norm": 0.25637149810791016, "learning_rate": 6.222826086956523e-06, "loss": 0.7843, "step": 230 }, { "epoch": 0.016325142419862257, "grad_norm": 0.24896763265132904, "learning_rate": 6.4945652173913055e-06, "loss": 0.7863, "step": 240 }, { "epoch": 0.017005356687356518, "grad_norm": 0.24670195579528809, "learning_rate": 6.766304347826087e-06, "loss": 0.7873, "step": 250 }, { "epoch": 0.01768557095485078, "grad_norm": 0.2428620159626007, "learning_rate": 7.03804347826087e-06, "loss": 0.7837, "step": 260 }, { "epoch": 0.018365785222345037, "grad_norm": 0.25439387559890747, "learning_rate": 7.309782608695652e-06, "loss": 0.7803, "step": 270 }, { "epoch": 0.0190459994898393, "grad_norm": 0.2559449076652527, "learning_rate": 7.581521739130435e-06, "loss": 0.7955, "step": 280 }, { "epoch": 0.01972621375733356, "grad_norm": 0.26467257738113403, "learning_rate": 7.853260869565218e-06, "loss": 0.7957, "step": 290 }, { "epoch": 0.02040642802482782, "grad_norm": 0.246280238032341, "learning_rate": 8.125000000000001e-06, "loss": 0.8032, "step": 300 }, { "epoch": 0.021086642292322082, "grad_norm": 0.2527225911617279, "learning_rate": 8.396739130434784e-06, "loss": 0.7802, "step": 310 }, { "epoch": 0.021766856559816344, "grad_norm": 0.2507527768611908, "learning_rate": 8.668478260869566e-06, "loss": 0.7968, "step": 320 }, { "epoch": 0.0224470708273106, "grad_norm": 0.25376781821250916, "learning_rate": 8.940217391304349e-06, "loss": 0.7903, "step": 330 }, { "epoch": 0.023127285094804863, "grad_norm": 0.24194824695587158, "learning_rate": 9.211956521739131e-06, "loss": 0.7857, "step": 340 }, { "epoch": 0.023807499362299124, "grad_norm": 0.25317472219467163, "learning_rate": 9.483695652173914e-06, "loss": 0.7788, "step": 350 }, { "epoch": 0.024487713629793385, "grad_norm": 0.26361075043678284, "learning_rate": 9.755434782608696e-06, "loss": 0.7841, "step": 360 }, { "epoch": 0.025167927897287647, "grad_norm": 0.2593362629413605, "learning_rate": 1.0027173913043479e-05, "loss": 0.7865, "step": 370 }, { "epoch": 0.025848142164781908, "grad_norm": 0.25769373774528503, "learning_rate": 1.0298913043478262e-05, "loss": 0.7821, "step": 380 }, { "epoch": 0.026528356432276166, "grad_norm": 0.25864651799201965, "learning_rate": 1.0570652173913046e-05, "loss": 0.7804, "step": 390 }, { "epoch": 0.027208570699770427, "grad_norm": 0.25476300716400146, "learning_rate": 1.0842391304347829e-05, "loss": 0.7827, "step": 400 }, { "epoch": 0.02788878496726469, "grad_norm": 0.2638179659843445, "learning_rate": 1.1114130434782611e-05, "loss": 0.7969, "step": 410 }, { "epoch": 0.02856899923475895, "grad_norm": 0.25068527460098267, "learning_rate": 1.1385869565217392e-05, "loss": 0.7707, "step": 420 }, { "epoch": 0.02924921350225321, "grad_norm": 0.2590930759906769, "learning_rate": 1.1657608695652175e-05, "loss": 0.7878, "step": 430 }, { "epoch": 0.029929427769747472, "grad_norm": 0.2491404116153717, "learning_rate": 1.1929347826086957e-05, "loss": 0.7877, "step": 440 }, { "epoch": 0.03060964203724173, "grad_norm": 0.2550014555454254, "learning_rate": 1.220108695652174e-05, "loss": 0.7755, "step": 450 }, { "epoch": 0.03128985630473599, "grad_norm": 0.25757303833961487, "learning_rate": 1.2472826086956522e-05, "loss": 0.7965, "step": 460 }, { "epoch": 0.03197007057223025, "grad_norm": 0.24262848496437073, "learning_rate": 1.2744565217391305e-05, "loss": 0.7695, "step": 470 }, { "epoch": 0.032650284839724514, "grad_norm": 0.2601710557937622, "learning_rate": 1.3016304347826088e-05, "loss": 0.795, "step": 480 }, { "epoch": 0.033330499107218775, "grad_norm": 0.2602006196975708, "learning_rate": 1.328804347826087e-05, "loss": 0.7838, "step": 490 }, { "epoch": 0.034010713374713036, "grad_norm": 0.2601083517074585, "learning_rate": 1.3559782608695653e-05, "loss": 0.7887, "step": 500 }, { "epoch": 0.0346909276422073, "grad_norm": 0.403003454208374, "learning_rate": 1.3831521739130435e-05, "loss": 0.7821, "step": 510 }, { "epoch": 0.03537114190970156, "grad_norm": 0.2679433822631836, "learning_rate": 1.4103260869565218e-05, "loss": 0.7813, "step": 520 }, { "epoch": 0.03605135617719581, "grad_norm": 0.25827887654304504, "learning_rate": 1.4375e-05, "loss": 0.7897, "step": 530 }, { "epoch": 0.036731570444690075, "grad_norm": 0.26387709379196167, "learning_rate": 1.4646739130434785e-05, "loss": 0.7902, "step": 540 }, { "epoch": 0.037411784712184336, "grad_norm": 0.25550195574760437, "learning_rate": 1.4918478260869567e-05, "loss": 0.7853, "step": 550 }, { "epoch": 0.0380919989796786, "grad_norm": 0.2730271518230438, "learning_rate": 1.519021739130435e-05, "loss": 0.8054, "step": 560 }, { "epoch": 0.03877221324717286, "grad_norm": 0.27456948161125183, "learning_rate": 1.546195652173913e-05, "loss": 0.7876, "step": 570 }, { "epoch": 0.03945242751466712, "grad_norm": 0.27266284823417664, "learning_rate": 1.5733695652173913e-05, "loss": 0.792, "step": 580 }, { "epoch": 0.04013264178216138, "grad_norm": 0.2583005726337433, "learning_rate": 1.6005434782608696e-05, "loss": 0.7855, "step": 590 }, { "epoch": 0.04081285604965564, "grad_norm": 0.2712930738925934, "learning_rate": 1.6277173913043482e-05, "loss": 0.7986, "step": 600 }, { "epoch": 0.041493070317149904, "grad_norm": 0.2771306335926056, "learning_rate": 1.6548913043478265e-05, "loss": 0.7951, "step": 610 }, { "epoch": 0.042173284584644165, "grad_norm": 0.2726171016693115, "learning_rate": 1.6820652173913047e-05, "loss": 0.7881, "step": 620 }, { "epoch": 0.042853498852138426, "grad_norm": 0.26444506645202637, "learning_rate": 1.709239130434783e-05, "loss": 0.7987, "step": 630 }, { "epoch": 0.04353371311963269, "grad_norm": 0.28685182332992554, "learning_rate": 1.7364130434782612e-05, "loss": 0.8071, "step": 640 }, { "epoch": 0.04421392738712694, "grad_norm": 0.2572171092033386, "learning_rate": 1.763586956521739e-05, "loss": 0.785, "step": 650 }, { "epoch": 0.0448941416546212, "grad_norm": 0.2599855065345764, "learning_rate": 1.7907608695652174e-05, "loss": 0.7958, "step": 660 }, { "epoch": 0.045574355922115464, "grad_norm": 0.2666078507900238, "learning_rate": 1.8179347826086957e-05, "loss": 0.7855, "step": 670 }, { "epoch": 0.046254570189609726, "grad_norm": 0.2504750192165375, "learning_rate": 1.845108695652174e-05, "loss": 0.7808, "step": 680 }, { "epoch": 0.04693478445710399, "grad_norm": 0.265756219625473, "learning_rate": 1.8722826086956522e-05, "loss": 0.8039, "step": 690 }, { "epoch": 0.04761499872459825, "grad_norm": 0.24935463070869446, "learning_rate": 1.8994565217391304e-05, "loss": 0.8042, "step": 700 }, { "epoch": 0.04829521299209251, "grad_norm": 0.2691420018672943, "learning_rate": 1.9266304347826087e-05, "loss": 0.79, "step": 710 }, { "epoch": 0.04897542725958677, "grad_norm": 0.2798922657966614, "learning_rate": 1.953804347826087e-05, "loss": 0.7957, "step": 720 }, { "epoch": 0.04965564152708103, "grad_norm": 0.2665148973464966, "learning_rate": 1.9809782608695652e-05, "loss": 0.7775, "step": 730 }, { "epoch": 0.05033585579457529, "grad_norm": 0.2752082347869873, "learning_rate": 1.999999772297307e-05, "loss": 0.7848, "step": 740 }, { "epoch": 0.051016070062069555, "grad_norm": 0.25373271107673645, "learning_rate": 1.9999957242523135e-05, "loss": 0.7894, "step": 750 }, { "epoch": 0.051696284329563816, "grad_norm": 0.27112025022506714, "learning_rate": 1.9999866161710487e-05, "loss": 0.7857, "step": 760 }, { "epoch": 0.05237649859705807, "grad_norm": 0.255633145570755, "learning_rate": 1.999972448099601e-05, "loss": 0.7895, "step": 770 }, { "epoch": 0.05305671286455233, "grad_norm": 0.25357896089553833, "learning_rate": 1.9999532201096614e-05, "loss": 0.7817, "step": 780 }, { "epoch": 0.05373692713204659, "grad_norm": 0.2669251263141632, "learning_rate": 1.999928932298524e-05, "loss": 0.8089, "step": 790 }, { "epoch": 0.054417141399540854, "grad_norm": 0.2620493173599243, "learning_rate": 1.9998995847890876e-05, "loss": 0.7836, "step": 800 }, { "epoch": 0.055097355667035115, "grad_norm": 0.25441357493400574, "learning_rate": 1.9998651777298512e-05, "loss": 0.7874, "step": 810 }, { "epoch": 0.05577756993452938, "grad_norm": 0.27190372347831726, "learning_rate": 1.9998257112949167e-05, "loss": 0.7841, "step": 820 }, { "epoch": 0.05645778420202364, "grad_norm": 0.26984357833862305, "learning_rate": 1.9997811856839875e-05, "loss": 0.7866, "step": 830 }, { "epoch": 0.0571379984695179, "grad_norm": 0.26123085618019104, "learning_rate": 1.9997316011223647e-05, "loss": 0.7975, "step": 840 }, { "epoch": 0.05781821273701216, "grad_norm": 0.2544929087162018, "learning_rate": 1.9996769578609494e-05, "loss": 0.7834, "step": 850 }, { "epoch": 0.05849842700450642, "grad_norm": 0.2855475842952728, "learning_rate": 1.99961725617624e-05, "loss": 0.795, "step": 860 }, { "epoch": 0.05917864127200068, "grad_norm": 0.26833781599998474, "learning_rate": 1.99955249637033e-05, "loss": 0.7934, "step": 870 }, { "epoch": 0.059858855539494944, "grad_norm": 0.2639079689979553, "learning_rate": 1.9994826787709082e-05, "loss": 0.7917, "step": 880 }, { "epoch": 0.0605390698069892, "grad_norm": 0.2570595443248749, "learning_rate": 1.9994078037312552e-05, "loss": 0.8031, "step": 890 }, { "epoch": 0.06121928407448346, "grad_norm": 0.26510873436927795, "learning_rate": 1.9993278716302435e-05, "loss": 0.7879, "step": 900 }, { "epoch": 0.06189949834197772, "grad_norm": 0.256365567445755, "learning_rate": 1.999242882872334e-05, "loss": 0.7806, "step": 910 }, { "epoch": 0.06257971260947198, "grad_norm": 0.25432026386260986, "learning_rate": 1.9991528378875747e-05, "loss": 0.79, "step": 920 }, { "epoch": 0.06325992687696624, "grad_norm": 0.25278228521347046, "learning_rate": 1.9990577371315983e-05, "loss": 0.7967, "step": 930 }, { "epoch": 0.0639401411444605, "grad_norm": 0.2596864104270935, "learning_rate": 1.99895758108562e-05, "loss": 0.786, "step": 940 }, { "epoch": 0.06462035541195477, "grad_norm": 0.25254207849502563, "learning_rate": 1.998852370256436e-05, "loss": 0.7775, "step": 950 }, { "epoch": 0.06530056967944903, "grad_norm": 0.2752808630466461, "learning_rate": 1.9987421051764184e-05, "loss": 0.7917, "step": 960 }, { "epoch": 0.06598078394694329, "grad_norm": 0.2659485638141632, "learning_rate": 1.998626786403515e-05, "loss": 0.7712, "step": 970 }, { "epoch": 0.06666099821443755, "grad_norm": 0.26159390807151794, "learning_rate": 1.998506414521246e-05, "loss": 0.8003, "step": 980 }, { "epoch": 0.06734121248193181, "grad_norm": 0.25408387184143066, "learning_rate": 1.9983809901386995e-05, "loss": 0.7882, "step": 990 }, { "epoch": 0.06802142674942607, "grad_norm": 0.2492450475692749, "learning_rate": 1.9982505138905303e-05, "loss": 0.8006, "step": 1000 }, { "epoch": 0.06870164101692033, "grad_norm": 0.26037201285362244, "learning_rate": 1.998114986436956e-05, "loss": 0.7897, "step": 1010 }, { "epoch": 0.0693818552844146, "grad_norm": 0.25039172172546387, "learning_rate": 1.997974408463753e-05, "loss": 0.7966, "step": 1020 }, { "epoch": 0.07006206955190886, "grad_norm": 0.25802841782569885, "learning_rate": 1.9978287806822544e-05, "loss": 0.7908, "step": 1030 }, { "epoch": 0.07074228381940312, "grad_norm": 0.25341302156448364, "learning_rate": 1.9976781038293444e-05, "loss": 0.7826, "step": 1040 }, { "epoch": 0.07142249808689738, "grad_norm": 0.2570743262767792, "learning_rate": 1.9975223786674576e-05, "loss": 0.7891, "step": 1050 }, { "epoch": 0.07210271235439163, "grad_norm": 0.25724539160728455, "learning_rate": 1.997361605984572e-05, "loss": 0.7906, "step": 1060 }, { "epoch": 0.07278292662188589, "grad_norm": 0.2633865773677826, "learning_rate": 1.9971957865942064e-05, "loss": 0.7928, "step": 1070 }, { "epoch": 0.07346314088938015, "grad_norm": 0.2706665098667145, "learning_rate": 1.9970249213354167e-05, "loss": 0.7876, "step": 1080 }, { "epoch": 0.07414335515687441, "grad_norm": 0.2597387135028839, "learning_rate": 1.996849011072791e-05, "loss": 0.7922, "step": 1090 }, { "epoch": 0.07482356942436867, "grad_norm": 0.2626490592956543, "learning_rate": 1.9966680566964456e-05, "loss": 0.7931, "step": 1100 }, { "epoch": 0.07550378369186293, "grad_norm": 0.26137858629226685, "learning_rate": 1.99648205912202e-05, "loss": 0.7881, "step": 1110 }, { "epoch": 0.0761839979593572, "grad_norm": 0.2648197412490845, "learning_rate": 1.9962910192906725e-05, "loss": 0.787, "step": 1120 }, { "epoch": 0.07686421222685146, "grad_norm": 0.25306472182273865, "learning_rate": 1.9960949381690757e-05, "loss": 0.7925, "step": 1130 }, { "epoch": 0.07754442649434572, "grad_norm": 0.2650945782661438, "learning_rate": 1.9958938167494118e-05, "loss": 0.7926, "step": 1140 }, { "epoch": 0.07822464076183998, "grad_norm": 0.24441677331924438, "learning_rate": 1.9956876560493668e-05, "loss": 0.7934, "step": 1150 }, { "epoch": 0.07890485502933424, "grad_norm": 0.25528618693351746, "learning_rate": 1.9954764571121254e-05, "loss": 0.7961, "step": 1160 }, { "epoch": 0.0795850692968285, "grad_norm": 0.2585403025150299, "learning_rate": 1.9952602210063664e-05, "loss": 0.7906, "step": 1170 }, { "epoch": 0.08026528356432276, "grad_norm": 0.26270559430122375, "learning_rate": 1.9950389488262576e-05, "loss": 0.7874, "step": 1180 }, { "epoch": 0.08094549783181702, "grad_norm": 0.259402871131897, "learning_rate": 1.9948126416914486e-05, "loss": 0.7946, "step": 1190 }, { "epoch": 0.08162571209931128, "grad_norm": 0.2569419741630554, "learning_rate": 1.9945813007470668e-05, "loss": 0.789, "step": 1200 }, { "epoch": 0.08230592636680555, "grad_norm": 0.2626119554042816, "learning_rate": 1.9943449271637103e-05, "loss": 0.7779, "step": 1210 }, { "epoch": 0.08298614063429981, "grad_norm": 0.2561872899532318, "learning_rate": 1.994103522137444e-05, "loss": 0.7798, "step": 1220 }, { "epoch": 0.08366635490179407, "grad_norm": 0.24945363402366638, "learning_rate": 1.9938570868897907e-05, "loss": 0.7823, "step": 1230 }, { "epoch": 0.08434656916928833, "grad_norm": 0.2486308068037033, "learning_rate": 1.993605622667727e-05, "loss": 0.7851, "step": 1240 }, { "epoch": 0.08502678343678259, "grad_norm": 0.2569667100906372, "learning_rate": 1.9933491307436768e-05, "loss": 0.7933, "step": 1250 }, { "epoch": 0.08570699770427685, "grad_norm": 0.24834266304969788, "learning_rate": 1.9930876124155037e-05, "loss": 0.796, "step": 1260 }, { "epoch": 0.08638721197177111, "grad_norm": 0.27963775396347046, "learning_rate": 1.9928210690065058e-05, "loss": 0.7944, "step": 1270 }, { "epoch": 0.08706742623926537, "grad_norm": 0.2627074718475342, "learning_rate": 1.992549501865408e-05, "loss": 0.7995, "step": 1280 }, { "epoch": 0.08774764050675964, "grad_norm": 0.25970980525016785, "learning_rate": 1.992272912366356e-05, "loss": 0.7823, "step": 1290 }, { "epoch": 0.08842785477425388, "grad_norm": 0.24787351489067078, "learning_rate": 1.9919913019089085e-05, "loss": 0.7839, "step": 1300 }, { "epoch": 0.08910806904174814, "grad_norm": 0.26264896988868713, "learning_rate": 1.991704671918031e-05, "loss": 0.7877, "step": 1310 }, { "epoch": 0.0897882833092424, "grad_norm": 0.26188844442367554, "learning_rate": 1.9914130238440874e-05, "loss": 0.7917, "step": 1320 }, { "epoch": 0.09046849757673667, "grad_norm": 0.2632116675376892, "learning_rate": 1.991116359162834e-05, "loss": 0.7889, "step": 1330 }, { "epoch": 0.09114871184423093, "grad_norm": 0.2512179911136627, "learning_rate": 1.9908146793754117e-05, "loss": 0.7885, "step": 1340 }, { "epoch": 0.09182892611172519, "grad_norm": 0.26141035556793213, "learning_rate": 1.9905079860083373e-05, "loss": 0.7858, "step": 1350 }, { "epoch": 0.09250914037921945, "grad_norm": 0.26588204503059387, "learning_rate": 1.9901962806134967e-05, "loss": 0.7848, "step": 1360 }, { "epoch": 0.09318935464671371, "grad_norm": 0.2591280937194824, "learning_rate": 1.9898795647681378e-05, "loss": 0.7953, "step": 1370 }, { "epoch": 0.09386956891420797, "grad_norm": 0.2677696645259857, "learning_rate": 1.989557840074861e-05, "loss": 0.7865, "step": 1380 }, { "epoch": 0.09454978318170223, "grad_norm": 0.2503071129322052, "learning_rate": 1.9892311081616116e-05, "loss": 0.7841, "step": 1390 }, { "epoch": 0.0952299974491965, "grad_norm": 0.26428544521331787, "learning_rate": 1.9888993706816722e-05, "loss": 0.7806, "step": 1400 }, { "epoch": 0.09591021171669076, "grad_norm": 0.24584265053272247, "learning_rate": 1.9885626293136537e-05, "loss": 0.7981, "step": 1410 }, { "epoch": 0.09659042598418502, "grad_norm": 0.27497342228889465, "learning_rate": 1.988220885761487e-05, "loss": 0.782, "step": 1420 }, { "epoch": 0.09727064025167928, "grad_norm": 0.2711544632911682, "learning_rate": 1.9878741417544138e-05, "loss": 0.7778, "step": 1430 }, { "epoch": 0.09795085451917354, "grad_norm": 0.2602938711643219, "learning_rate": 1.9875223990469797e-05, "loss": 0.7902, "step": 1440 }, { "epoch": 0.0986310687866678, "grad_norm": 0.2596087157726288, "learning_rate": 1.9871656594190222e-05, "loss": 0.7893, "step": 1450 }, { "epoch": 0.09931128305416206, "grad_norm": 0.2592509984970093, "learning_rate": 1.9868039246756652e-05, "loss": 0.7861, "step": 1460 }, { "epoch": 0.09999149732165633, "grad_norm": 0.2511366307735443, "learning_rate": 1.986437196647307e-05, "loss": 0.7953, "step": 1470 }, { "epoch": 0.10067171158915059, "grad_norm": 0.27958574891090393, "learning_rate": 1.986065477189613e-05, "loss": 0.7864, "step": 1480 }, { "epoch": 0.10135192585664485, "grad_norm": 0.252519428730011, "learning_rate": 1.9856887681835045e-05, "loss": 0.7883, "step": 1490 }, { "epoch": 0.10203214012413911, "grad_norm": 0.26588451862335205, "learning_rate": 1.9853070715351517e-05, "loss": 0.8007, "step": 1500 }, { "epoch": 0.10271235439163337, "grad_norm": 0.2513459026813507, "learning_rate": 1.9849203891759615e-05, "loss": 0.8079, "step": 1510 }, { "epoch": 0.10339256865912763, "grad_norm": 0.25905874371528625, "learning_rate": 1.984528723062569e-05, "loss": 0.7838, "step": 1520 }, { "epoch": 0.10407278292662188, "grad_norm": 0.2662612497806549, "learning_rate": 1.9841320751768277e-05, "loss": 0.7886, "step": 1530 }, { "epoch": 0.10475299719411614, "grad_norm": 0.2671913504600525, "learning_rate": 1.983730447525798e-05, "loss": 0.7926, "step": 1540 }, { "epoch": 0.1054332114616104, "grad_norm": 0.2774171233177185, "learning_rate": 1.98332384214174e-05, "loss": 0.7952, "step": 1550 }, { "epoch": 0.10611342572910466, "grad_norm": 0.25690150260925293, "learning_rate": 1.9829122610820998e-05, "loss": 0.7731, "step": 1560 }, { "epoch": 0.10679363999659892, "grad_norm": 0.26239097118377686, "learning_rate": 1.982495706429502e-05, "loss": 0.7777, "step": 1570 }, { "epoch": 0.10747385426409319, "grad_norm": 0.2668183147907257, "learning_rate": 1.9820741802917368e-05, "loss": 0.7892, "step": 1580 }, { "epoch": 0.10815406853158745, "grad_norm": 0.2633265256881714, "learning_rate": 1.981647684801751e-05, "loss": 0.8007, "step": 1590 }, { "epoch": 0.10883428279908171, "grad_norm": 0.26835504174232483, "learning_rate": 1.9812162221176368e-05, "loss": 0.7751, "step": 1600 }, { "epoch": 0.10951449706657597, "grad_norm": 0.26324793696403503, "learning_rate": 1.98077979442262e-05, "loss": 0.7892, "step": 1610 }, { "epoch": 0.11019471133407023, "grad_norm": 0.2817467451095581, "learning_rate": 1.9803384039250498e-05, "loss": 0.8094, "step": 1620 }, { "epoch": 0.11087492560156449, "grad_norm": 0.26144781708717346, "learning_rate": 1.979892052858388e-05, "loss": 0.7952, "step": 1630 }, { "epoch": 0.11155513986905875, "grad_norm": 0.25879013538360596, "learning_rate": 1.979440743481197e-05, "loss": 0.7944, "step": 1640 }, { "epoch": 0.11223535413655301, "grad_norm": 0.2659333348274231, "learning_rate": 1.9789844780771283e-05, "loss": 0.7818, "step": 1650 }, { "epoch": 0.11291556840404728, "grad_norm": 0.2678092420101166, "learning_rate": 1.978523258954911e-05, "loss": 0.7914, "step": 1660 }, { "epoch": 0.11359578267154154, "grad_norm": 0.26243963837623596, "learning_rate": 1.9780570884483404e-05, "loss": 0.7984, "step": 1670 }, { "epoch": 0.1142759969390358, "grad_norm": 0.2623418867588043, "learning_rate": 1.977585968916267e-05, "loss": 0.7931, "step": 1680 }, { "epoch": 0.11495621120653006, "grad_norm": 0.2606011629104614, "learning_rate": 1.9771099027425827e-05, "loss": 0.7915, "step": 1690 }, { "epoch": 0.11563642547402432, "grad_norm": 0.2516583800315857, "learning_rate": 1.9766288923362093e-05, "loss": 0.7943, "step": 1700 }, { "epoch": 0.11631663974151858, "grad_norm": 0.24944451451301575, "learning_rate": 1.9761429401310878e-05, "loss": 0.7905, "step": 1710 }, { "epoch": 0.11699685400901284, "grad_norm": 0.25479376316070557, "learning_rate": 1.9756520485861644e-05, "loss": 0.7965, "step": 1720 }, { "epoch": 0.1176770682765071, "grad_norm": 0.2506806552410126, "learning_rate": 1.9751562201853788e-05, "loss": 0.7835, "step": 1730 }, { "epoch": 0.11835728254400137, "grad_norm": 0.25013062357902527, "learning_rate": 1.9746554574376513e-05, "loss": 0.7787, "step": 1740 }, { "epoch": 0.11903749681149563, "grad_norm": 0.2511868178844452, "learning_rate": 1.9741497628768704e-05, "loss": 0.7874, "step": 1750 }, { "epoch": 0.11971771107898989, "grad_norm": 0.24284197390079498, "learning_rate": 1.97363913906188e-05, "loss": 0.7893, "step": 1760 }, { "epoch": 0.12039792534648414, "grad_norm": 0.25849926471710205, "learning_rate": 1.973123588576466e-05, "loss": 0.7953, "step": 1770 }, { "epoch": 0.1210781396139784, "grad_norm": 0.24535062909126282, "learning_rate": 1.9726031140293442e-05, "loss": 0.7783, "step": 1780 }, { "epoch": 0.12175835388147266, "grad_norm": 0.2653627097606659, "learning_rate": 1.9720777180541454e-05, "loss": 0.8002, "step": 1790 }, { "epoch": 0.12243856814896692, "grad_norm": 0.2720264196395874, "learning_rate": 1.9715474033094035e-05, "loss": 0.793, "step": 1800 }, { "epoch": 0.12311878241646118, "grad_norm": 0.2735433280467987, "learning_rate": 1.9710121724785422e-05, "loss": 0.7875, "step": 1810 }, { "epoch": 0.12379899668395544, "grad_norm": 0.2591589391231537, "learning_rate": 1.9704720282698607e-05, "loss": 0.7735, "step": 1820 }, { "epoch": 0.1244792109514497, "grad_norm": 0.2804940938949585, "learning_rate": 1.9699269734165195e-05, "loss": 0.7799, "step": 1830 }, { "epoch": 0.12515942521894396, "grad_norm": 0.2527720630168915, "learning_rate": 1.969377010676528e-05, "loss": 0.7986, "step": 1840 }, { "epoch": 0.12583963948643823, "grad_norm": 0.26848408579826355, "learning_rate": 1.9688221428327295e-05, "loss": 0.7955, "step": 1850 }, { "epoch": 0.1265198537539325, "grad_norm": 0.25075897574424744, "learning_rate": 1.9682623726927873e-05, "loss": 0.7967, "step": 1860 }, { "epoch": 0.12720006802142675, "grad_norm": 0.24642987549304962, "learning_rate": 1.9676977030891707e-05, "loss": 0.7908, "step": 1870 }, { "epoch": 0.127880282288921, "grad_norm": 0.2501721680164337, "learning_rate": 1.9671281368791407e-05, "loss": 0.7788, "step": 1880 }, { "epoch": 0.12856049655641527, "grad_norm": 0.2666166424751282, "learning_rate": 1.9665536769447357e-05, "loss": 0.7993, "step": 1890 }, { "epoch": 0.12924071082390953, "grad_norm": 0.26103368401527405, "learning_rate": 1.9659743261927553e-05, "loss": 0.8011, "step": 1900 }, { "epoch": 0.1299209250914038, "grad_norm": 0.2501685917377472, "learning_rate": 1.9653900875547486e-05, "loss": 0.7938, "step": 1910 }, { "epoch": 0.13060113935889806, "grad_norm": 0.2564981281757355, "learning_rate": 1.9648009639869966e-05, "loss": 0.7986, "step": 1920 }, { "epoch": 0.13128135362639232, "grad_norm": 0.24962271749973297, "learning_rate": 1.9642069584704985e-05, "loss": 0.7944, "step": 1930 }, { "epoch": 0.13196156789388658, "grad_norm": 0.26453691720962524, "learning_rate": 1.9636080740109575e-05, "loss": 0.8025, "step": 1940 }, { "epoch": 0.13264178216138084, "grad_norm": 0.2584055960178375, "learning_rate": 1.9630043136387623e-05, "loss": 0.7899, "step": 1950 }, { "epoch": 0.1333219964288751, "grad_norm": 0.24752554297447205, "learning_rate": 1.9623956804089766e-05, "loss": 0.7723, "step": 1960 }, { "epoch": 0.13400221069636936, "grad_norm": 0.2683132290840149, "learning_rate": 1.961782177401319e-05, "loss": 0.7847, "step": 1970 }, { "epoch": 0.13468242496386362, "grad_norm": 0.26384273171424866, "learning_rate": 1.9611638077201508e-05, "loss": 0.807, "step": 1980 }, { "epoch": 0.13536263923135788, "grad_norm": 0.2578035891056061, "learning_rate": 1.960540574494458e-05, "loss": 0.7938, "step": 1990 }, { "epoch": 0.13604285349885215, "grad_norm": 0.26255810260772705, "learning_rate": 1.9599124808778363e-05, "loss": 0.7994, "step": 2000 }, { "epoch": 0.1367230677663464, "grad_norm": 0.2509863078594208, "learning_rate": 1.959279530048477e-05, "loss": 0.7875, "step": 2010 }, { "epoch": 0.13740328203384067, "grad_norm": 52.004241943359375, "learning_rate": 1.958641725209147e-05, "loss": 0.7879, "step": 2020 }, { "epoch": 0.13808349630133493, "grad_norm": 0.2877405285835266, "learning_rate": 1.9579990695871768e-05, "loss": 0.7983, "step": 2030 }, { "epoch": 0.1387637105688292, "grad_norm": 0.25437185168266296, "learning_rate": 1.9573515664344396e-05, "loss": 0.7964, "step": 2040 }, { "epoch": 0.13944392483632345, "grad_norm": 0.24121397733688354, "learning_rate": 1.9566992190273394e-05, "loss": 0.7774, "step": 2050 }, { "epoch": 0.1401241391038177, "grad_norm": 0.2614583969116211, "learning_rate": 1.9560420306667924e-05, "loss": 0.7907, "step": 2060 }, { "epoch": 0.14080435337131197, "grad_norm": 0.25397762656211853, "learning_rate": 1.9553800046782093e-05, "loss": 0.7704, "step": 2070 }, { "epoch": 0.14148456763880624, "grad_norm": 0.2523746192455292, "learning_rate": 1.95471314441148e-05, "loss": 0.78, "step": 2080 }, { "epoch": 0.1421647819063005, "grad_norm": 0.2488732635974884, "learning_rate": 1.954041453240956e-05, "loss": 0.7877, "step": 2090 }, { "epoch": 0.14284499617379476, "grad_norm": 0.24965067207813263, "learning_rate": 1.9533649345654338e-05, "loss": 0.7849, "step": 2100 }, { "epoch": 0.14352521044128902, "grad_norm": 0.25302186608314514, "learning_rate": 1.9526835918081363e-05, "loss": 0.7885, "step": 2110 }, { "epoch": 0.14420542470878325, "grad_norm": 0.260049968957901, "learning_rate": 1.9519974284166982e-05, "loss": 0.7899, "step": 2120 }, { "epoch": 0.14488563897627751, "grad_norm": 0.26019856333732605, "learning_rate": 1.951306447863146e-05, "loss": 0.7943, "step": 2130 }, { "epoch": 0.14556585324377178, "grad_norm": 0.25930050015449524, "learning_rate": 1.9506106536438806e-05, "loss": 0.7877, "step": 2140 }, { "epoch": 0.14624606751126604, "grad_norm": 0.2671353816986084, "learning_rate": 1.9499100492796614e-05, "loss": 0.8049, "step": 2150 }, { "epoch": 0.1469262817787603, "grad_norm": 0.25898241996765137, "learning_rate": 1.949204638315587e-05, "loss": 0.7876, "step": 2160 }, { "epoch": 0.14760649604625456, "grad_norm": 0.24996714293956757, "learning_rate": 1.948494424321078e-05, "loss": 0.7861, "step": 2170 }, { "epoch": 0.14828671031374882, "grad_norm": 0.25589415431022644, "learning_rate": 1.947779410889858e-05, "loss": 0.79, "step": 2180 }, { "epoch": 0.14896692458124308, "grad_norm": 0.26438066363334656, "learning_rate": 1.9470596016399366e-05, "loss": 0.7955, "step": 2190 }, { "epoch": 0.14964713884873734, "grad_norm": 0.24316558241844177, "learning_rate": 1.9463350002135903e-05, "loss": 0.7886, "step": 2200 }, { "epoch": 0.1503273531162316, "grad_norm": 0.25717779994010925, "learning_rate": 1.945605610277344e-05, "loss": 0.7765, "step": 2210 }, { "epoch": 0.15100756738372587, "grad_norm": 0.2595769464969635, "learning_rate": 1.944871435521953e-05, "loss": 0.7874, "step": 2220 }, { "epoch": 0.15168778165122013, "grad_norm": 0.2691980302333832, "learning_rate": 1.9441324796623843e-05, "loss": 0.8013, "step": 2230 }, { "epoch": 0.1523679959187144, "grad_norm": 0.2670581638813019, "learning_rate": 1.9433887464377975e-05, "loss": 0.7922, "step": 2240 }, { "epoch": 0.15304821018620865, "grad_norm": 0.25826627016067505, "learning_rate": 1.9426402396115256e-05, "loss": 0.7823, "step": 2250 }, { "epoch": 0.1537284244537029, "grad_norm": 0.24645116925239563, "learning_rate": 1.941886962971056e-05, "loss": 0.7953, "step": 2260 }, { "epoch": 0.15440863872119717, "grad_norm": 0.26420411467552185, "learning_rate": 1.941128920328013e-05, "loss": 0.8027, "step": 2270 }, { "epoch": 0.15508885298869143, "grad_norm": 0.2580704987049103, "learning_rate": 1.940366115518136e-05, "loss": 0.7892, "step": 2280 }, { "epoch": 0.1557690672561857, "grad_norm": 0.2476886659860611, "learning_rate": 1.939598552401261e-05, "loss": 0.7859, "step": 2290 }, { "epoch": 0.15644928152367996, "grad_norm": 0.2571199834346771, "learning_rate": 1.9388262348613023e-05, "loss": 0.8004, "step": 2300 }, { "epoch": 0.15712949579117422, "grad_norm": 0.2546902000904083, "learning_rate": 1.9380491668062313e-05, "loss": 0.7731, "step": 2310 }, { "epoch": 0.15780971005866848, "grad_norm": 0.26125824451446533, "learning_rate": 1.937267352168057e-05, "loss": 0.7815, "step": 2320 }, { "epoch": 0.15848992432616274, "grad_norm": 0.2520924508571625, "learning_rate": 1.936480794902807e-05, "loss": 0.7699, "step": 2330 }, { "epoch": 0.159170138593657, "grad_norm": 0.2505636513233185, "learning_rate": 1.9356894989905072e-05, "loss": 0.8013, "step": 2340 }, { "epoch": 0.15985035286115126, "grad_norm": 0.2629729211330414, "learning_rate": 1.93489346843516e-05, "loss": 0.7784, "step": 2350 }, { "epoch": 0.16053056712864552, "grad_norm": 0.24999473989009857, "learning_rate": 1.934092707264726e-05, "loss": 0.7933, "step": 2360 }, { "epoch": 0.16121078139613979, "grad_norm": 1.5976718664169312, "learning_rate": 1.9332872195311038e-05, "loss": 0.8006, "step": 2370 }, { "epoch": 0.16189099566363405, "grad_norm": 0.2572176456451416, "learning_rate": 1.932477009310107e-05, "loss": 0.7872, "step": 2380 }, { "epoch": 0.1625712099311283, "grad_norm": 0.25545644760131836, "learning_rate": 1.931662080701447e-05, "loss": 0.7924, "step": 2390 }, { "epoch": 0.16325142419862257, "grad_norm": 0.2590372562408447, "learning_rate": 1.9308424378287098e-05, "loss": 0.784, "step": 2400 }, { "epoch": 0.16393163846611683, "grad_norm": 0.2718978822231293, "learning_rate": 1.9300180848393354e-05, "loss": 0.791, "step": 2410 }, { "epoch": 0.1646118527336111, "grad_norm": 0.26363012194633484, "learning_rate": 1.929189025904598e-05, "loss": 0.7921, "step": 2420 }, { "epoch": 0.16529206700110535, "grad_norm": 0.25173771381378174, "learning_rate": 1.928355265219583e-05, "loss": 0.7952, "step": 2430 }, { "epoch": 0.16597228126859961, "grad_norm": 0.26242753863334656, "learning_rate": 1.9275168070031692e-05, "loss": 0.7835, "step": 2440 }, { "epoch": 0.16665249553609388, "grad_norm": 0.26284611225128174, "learning_rate": 1.9266736554980024e-05, "loss": 0.778, "step": 2450 }, { "epoch": 0.16733270980358814, "grad_norm": 0.25407007336616516, "learning_rate": 1.9258258149704786e-05, "loss": 0.7824, "step": 2460 }, { "epoch": 0.1680129240710824, "grad_norm": 0.25445783138275146, "learning_rate": 1.9249732897107198e-05, "loss": 0.7996, "step": 2470 }, { "epoch": 0.16869313833857666, "grad_norm": 0.2684445083141327, "learning_rate": 1.9241160840325536e-05, "loss": 0.7902, "step": 2480 }, { "epoch": 0.16937335260607092, "grad_norm": 0.251264750957489, "learning_rate": 1.9232542022734897e-05, "loss": 0.7858, "step": 2490 }, { "epoch": 0.17005356687356518, "grad_norm": 0.25886887311935425, "learning_rate": 1.9223876487947002e-05, "loss": 0.7834, "step": 2500 }, { "epoch": 0.17073378114105944, "grad_norm": 0.24720726907253265, "learning_rate": 1.9215164279809958e-05, "loss": 0.7942, "step": 2510 }, { "epoch": 0.1714139954085537, "grad_norm": 0.2537200152873993, "learning_rate": 1.920640544240804e-05, "loss": 0.7936, "step": 2520 }, { "epoch": 0.17209420967604797, "grad_norm": 0.24554233253002167, "learning_rate": 1.9197600020061475e-05, "loss": 0.7779, "step": 2530 }, { "epoch": 0.17277442394354223, "grad_norm": 0.27647775411605835, "learning_rate": 1.9188748057326213e-05, "loss": 0.7887, "step": 2540 }, { "epoch": 0.1734546382110365, "grad_norm": 0.25730079412460327, "learning_rate": 1.917984959899369e-05, "loss": 0.789, "step": 2550 }, { "epoch": 0.17413485247853075, "grad_norm": 0.2522630989551544, "learning_rate": 1.9170904690090628e-05, "loss": 0.7823, "step": 2560 }, { "epoch": 0.174815066746025, "grad_norm": 0.2553759217262268, "learning_rate": 1.9161913375878777e-05, "loss": 0.7832, "step": 2570 }, { "epoch": 0.17549528101351927, "grad_norm": 0.25228244066238403, "learning_rate": 1.9152875701854715e-05, "loss": 0.7892, "step": 2580 }, { "epoch": 0.1761754952810135, "grad_norm": 0.25936052203178406, "learning_rate": 1.9143791713749592e-05, "loss": 0.7795, "step": 2590 }, { "epoch": 0.17685570954850777, "grad_norm": 0.2502341866493225, "learning_rate": 1.9134661457528914e-05, "loss": 0.7944, "step": 2600 }, { "epoch": 0.17753592381600203, "grad_norm": 0.24452045559883118, "learning_rate": 1.9125484979392306e-05, "loss": 0.7805, "step": 2610 }, { "epoch": 0.1782161380834963, "grad_norm": 0.25821682810783386, "learning_rate": 1.911626232577328e-05, "loss": 0.7858, "step": 2620 }, { "epoch": 0.17889635235099055, "grad_norm": 0.2633768320083618, "learning_rate": 1.9106993543338997e-05, "loss": 0.792, "step": 2630 }, { "epoch": 0.1795765666184848, "grad_norm": 0.2523728609085083, "learning_rate": 1.909767867899003e-05, "loss": 0.789, "step": 2640 }, { "epoch": 0.18025678088597907, "grad_norm": 0.2643498182296753, "learning_rate": 1.9088317779860136e-05, "loss": 0.7941, "step": 2650 }, { "epoch": 0.18093699515347333, "grad_norm": 0.2574792504310608, "learning_rate": 1.9078910893316005e-05, "loss": 0.7933, "step": 2660 }, { "epoch": 0.1816172094209676, "grad_norm": 0.24570371210575104, "learning_rate": 1.906945806695703e-05, "loss": 0.7811, "step": 2670 }, { "epoch": 0.18229742368846186, "grad_norm": 0.26268598437309265, "learning_rate": 1.9059959348615053e-05, "loss": 0.7853, "step": 2680 }, { "epoch": 0.18297763795595612, "grad_norm": 0.2532947063446045, "learning_rate": 1.9050414786354144e-05, "loss": 0.7887, "step": 2690 }, { "epoch": 0.18365785222345038, "grad_norm": 0.26706111431121826, "learning_rate": 1.904082442847034e-05, "loss": 0.8093, "step": 2700 }, { "epoch": 0.18433806649094464, "grad_norm": 0.2524626851081848, "learning_rate": 1.903118832349141e-05, "loss": 0.784, "step": 2710 }, { "epoch": 0.1850182807584389, "grad_norm": 0.2823905348777771, "learning_rate": 1.9021506520176598e-05, "loss": 0.7824, "step": 2720 }, { "epoch": 0.18569849502593316, "grad_norm": 0.25040870904922485, "learning_rate": 1.9011779067516392e-05, "loss": 0.7828, "step": 2730 }, { "epoch": 0.18637870929342742, "grad_norm": 0.255565345287323, "learning_rate": 1.9002006014732266e-05, "loss": 0.7721, "step": 2740 }, { "epoch": 0.18705892356092169, "grad_norm": 0.24715404212474823, "learning_rate": 1.8992187411276433e-05, "loss": 0.7908, "step": 2750 }, { "epoch": 0.18773913782841595, "grad_norm": 0.24670803546905518, "learning_rate": 1.8982323306831592e-05, "loss": 0.7905, "step": 2760 }, { "epoch": 0.1884193520959102, "grad_norm": 0.25133422017097473, "learning_rate": 1.897241375131069e-05, "loss": 0.7907, "step": 2770 }, { "epoch": 0.18909956636340447, "grad_norm": 0.24613183736801147, "learning_rate": 1.8962458794856645e-05, "loss": 0.7739, "step": 2780 }, { "epoch": 0.18977978063089873, "grad_norm": 0.24352288246154785, "learning_rate": 1.8952458487842114e-05, "loss": 0.7834, "step": 2790 }, { "epoch": 0.190459994898393, "grad_norm": 0.2572003901004791, "learning_rate": 1.894241288086923e-05, "loss": 0.7865, "step": 2800 }, { "epoch": 0.19114020916588725, "grad_norm": 0.24495874345302582, "learning_rate": 1.893232202476935e-05, "loss": 0.7822, "step": 2810 }, { "epoch": 0.19182042343338152, "grad_norm": 0.24594272673130035, "learning_rate": 1.8922185970602785e-05, "loss": 0.785, "step": 2820 }, { "epoch": 0.19250063770087578, "grad_norm": 0.24420033395290375, "learning_rate": 1.891200476965856e-05, "loss": 0.7849, "step": 2830 }, { "epoch": 0.19318085196837004, "grad_norm": 0.25174930691719055, "learning_rate": 1.8901778473454134e-05, "loss": 0.7726, "step": 2840 }, { "epoch": 0.1938610662358643, "grad_norm": 0.25425928831100464, "learning_rate": 1.889150713373517e-05, "loss": 0.7866, "step": 2850 }, { "epoch": 0.19454128050335856, "grad_norm": 0.2496025264263153, "learning_rate": 1.8881190802475227e-05, "loss": 0.7917, "step": 2860 }, { "epoch": 0.19522149477085282, "grad_norm": 0.25157952308654785, "learning_rate": 1.887082953187555e-05, "loss": 0.7875, "step": 2870 }, { "epoch": 0.19590170903834708, "grad_norm": 0.25684642791748047, "learning_rate": 1.8860423374364767e-05, "loss": 0.7775, "step": 2880 }, { "epoch": 0.19658192330584134, "grad_norm": 0.37965211272239685, "learning_rate": 1.8849972382598634e-05, "loss": 0.7809, "step": 2890 }, { "epoch": 0.1972621375733356, "grad_norm": 0.28048843145370483, "learning_rate": 1.8839476609459778e-05, "loss": 0.7934, "step": 2900 }, { "epoch": 0.19794235184082987, "grad_norm": 0.2760063111782074, "learning_rate": 1.8828936108057413e-05, "loss": 0.7938, "step": 2910 }, { "epoch": 0.19862256610832413, "grad_norm": 0.2563750147819519, "learning_rate": 1.8818350931727093e-05, "loss": 0.7887, "step": 2920 }, { "epoch": 0.1993027803758184, "grad_norm": 0.24341252446174622, "learning_rate": 1.8807721134030418e-05, "loss": 0.7989, "step": 2930 }, { "epoch": 0.19998299464331265, "grad_norm": 0.25352743268013, "learning_rate": 1.8797046768754783e-05, "loss": 0.762, "step": 2940 }, { "epoch": 0.2006632089108069, "grad_norm": 0.26104164123535156, "learning_rate": 1.8786327889913093e-05, "loss": 0.7762, "step": 2950 }, { "epoch": 0.20134342317830117, "grad_norm": 0.24420127272605896, "learning_rate": 1.8775564551743498e-05, "loss": 0.7812, "step": 2960 }, { "epoch": 0.20202363744579543, "grad_norm": 0.24496297538280487, "learning_rate": 1.876475680870911e-05, "loss": 0.7849, "step": 2970 }, { "epoch": 0.2027038517132897, "grad_norm": 0.24318674206733704, "learning_rate": 1.8753904715497734e-05, "loss": 0.8012, "step": 2980 }, { "epoch": 0.20338406598078396, "grad_norm": 0.25721314549446106, "learning_rate": 1.8743008327021594e-05, "loss": 0.788, "step": 2990 }, { "epoch": 0.20406428024827822, "grad_norm": 0.2569056749343872, "learning_rate": 1.8732067698417042e-05, "loss": 0.7852, "step": 3000 }, { "epoch": 0.20474449451577248, "grad_norm": 0.2654126286506653, "learning_rate": 1.8721082885044296e-05, "loss": 0.7908, "step": 3010 }, { "epoch": 0.20542470878326674, "grad_norm": 0.2526949942111969, "learning_rate": 1.8710053942487144e-05, "loss": 0.7924, "step": 3020 }, { "epoch": 0.206104923050761, "grad_norm": 0.2519407272338867, "learning_rate": 1.8698980926552675e-05, "loss": 0.781, "step": 3030 }, { "epoch": 0.20678513731825526, "grad_norm": 0.23384132981300354, "learning_rate": 1.8687863893270995e-05, "loss": 0.7733, "step": 3040 }, { "epoch": 0.20746535158574952, "grad_norm": 0.25337284803390503, "learning_rate": 1.867670289889493e-05, "loss": 0.7963, "step": 3050 }, { "epoch": 0.20814556585324376, "grad_norm": 0.2520735263824463, "learning_rate": 1.8665497999899763e-05, "loss": 0.7783, "step": 3060 }, { "epoch": 0.20882578012073802, "grad_norm": 0.25193944573402405, "learning_rate": 1.865424925298293e-05, "loss": 0.7874, "step": 3070 }, { "epoch": 0.20950599438823228, "grad_norm": 0.26063066720962524, "learning_rate": 1.864295671506374e-05, "loss": 0.7863, "step": 3080 }, { "epoch": 0.21018620865572654, "grad_norm": 0.2780202627182007, "learning_rate": 1.8631620443283093e-05, "loss": 0.7907, "step": 3090 }, { "epoch": 0.2108664229232208, "grad_norm": 0.25003233551979065, "learning_rate": 1.8620240495003172e-05, "loss": 0.7834, "step": 3100 }, { "epoch": 0.21154663719071506, "grad_norm": 0.2419746369123459, "learning_rate": 1.8608816927807182e-05, "loss": 0.7776, "step": 3110 }, { "epoch": 0.21222685145820933, "grad_norm": 0.2449108064174652, "learning_rate": 1.8597349799499037e-05, "loss": 0.779, "step": 3120 }, { "epoch": 0.2129070657257036, "grad_norm": 0.2526479959487915, "learning_rate": 1.858583916810306e-05, "loss": 0.7939, "step": 3130 }, { "epoch": 0.21358727999319785, "grad_norm": 0.2487485408782959, "learning_rate": 1.857428509186372e-05, "loss": 0.793, "step": 3140 }, { "epoch": 0.2142674942606921, "grad_norm": 0.25782719254493713, "learning_rate": 1.8562687629245312e-05, "loss": 0.7807, "step": 3150 }, { "epoch": 0.21494770852818637, "grad_norm": 0.24029205739498138, "learning_rate": 1.8551046838931665e-05, "loss": 0.7909, "step": 3160 }, { "epoch": 0.21562792279568063, "grad_norm": 0.24059611558914185, "learning_rate": 1.853936277982585e-05, "loss": 0.7798, "step": 3170 }, { "epoch": 0.2163081370631749, "grad_norm": 0.2464148998260498, "learning_rate": 1.8527635511049893e-05, "loss": 0.7797, "step": 3180 }, { "epoch": 0.21698835133066915, "grad_norm": 0.2505282759666443, "learning_rate": 1.851586509194444e-05, "loss": 0.7908, "step": 3190 }, { "epoch": 0.21766856559816342, "grad_norm": 0.47646915912628174, "learning_rate": 1.8504051582068502e-05, "loss": 0.7863, "step": 3200 }, { "epoch": 0.21834877986565768, "grad_norm": 0.2539674937725067, "learning_rate": 1.8492195041199122e-05, "loss": 0.7858, "step": 3210 }, { "epoch": 0.21902899413315194, "grad_norm": 0.2528616487979889, "learning_rate": 1.848029552933108e-05, "loss": 0.7894, "step": 3220 }, { "epoch": 0.2197092084006462, "grad_norm": 0.2563914358615875, "learning_rate": 1.84683531066766e-05, "loss": 0.7789, "step": 3230 }, { "epoch": 0.22038942266814046, "grad_norm": 0.25774049758911133, "learning_rate": 1.8456367833665024e-05, "loss": 0.7702, "step": 3240 }, { "epoch": 0.22106963693563472, "grad_norm": 0.25088292360305786, "learning_rate": 1.8444339770942537e-05, "loss": 0.7835, "step": 3250 }, { "epoch": 0.22174985120312898, "grad_norm": 0.24714317917823792, "learning_rate": 1.8432268979371825e-05, "loss": 0.7669, "step": 3260 }, { "epoch": 0.22243006547062324, "grad_norm": 0.25273606181144714, "learning_rate": 1.8420155520031803e-05, "loss": 0.7942, "step": 3270 }, { "epoch": 0.2231102797381175, "grad_norm": 0.25561535358428955, "learning_rate": 1.840799945421726e-05, "loss": 0.7879, "step": 3280 }, { "epoch": 0.22379049400561177, "grad_norm": 0.24375256896018982, "learning_rate": 1.8395800843438608e-05, "loss": 0.797, "step": 3290 }, { "epoch": 0.22447070827310603, "grad_norm": 0.25886252522468567, "learning_rate": 1.8383559749421517e-05, "loss": 0.7828, "step": 3300 }, { "epoch": 0.2251509225406003, "grad_norm": 0.2579536437988281, "learning_rate": 1.8371276234106634e-05, "loss": 0.7938, "step": 3310 }, { "epoch": 0.22583113680809455, "grad_norm": 0.2595989406108856, "learning_rate": 1.8358950359649256e-05, "loss": 0.779, "step": 3320 }, { "epoch": 0.2265113510755888, "grad_norm": 0.2517205476760864, "learning_rate": 1.8346582188419022e-05, "loss": 0.7743, "step": 3330 }, { "epoch": 0.22719156534308307, "grad_norm": 0.2425038069486618, "learning_rate": 1.8334171782999596e-05, "loss": 0.7698, "step": 3340 }, { "epoch": 0.22787177961057734, "grad_norm": 0.2507912516593933, "learning_rate": 1.8321719206188344e-05, "loss": 0.7834, "step": 3350 }, { "epoch": 0.2285519938780716, "grad_norm": 0.25281819701194763, "learning_rate": 1.8309224520996034e-05, "loss": 0.779, "step": 3360 }, { "epoch": 0.22923220814556586, "grad_norm": 0.24461200833320618, "learning_rate": 1.8296687790646487e-05, "loss": 0.7771, "step": 3370 }, { "epoch": 0.22991242241306012, "grad_norm": 0.2611219584941864, "learning_rate": 1.828410907857629e-05, "loss": 0.7872, "step": 3380 }, { "epoch": 0.23059263668055438, "grad_norm": 0.26623839139938354, "learning_rate": 1.8271488448434455e-05, "loss": 0.782, "step": 3390 }, { "epoch": 0.23127285094804864, "grad_norm": 0.2582928240299225, "learning_rate": 1.8258825964082096e-05, "loss": 0.784, "step": 3400 }, { "epoch": 0.2319530652155429, "grad_norm": 0.3065914809703827, "learning_rate": 1.8246121689592123e-05, "loss": 0.792, "step": 3410 }, { "epoch": 0.23263327948303716, "grad_norm": 0.25425055623054504, "learning_rate": 1.8233375689248896e-05, "loss": 0.7827, "step": 3420 }, { "epoch": 0.23331349375053143, "grad_norm": 0.2543756067752838, "learning_rate": 1.8220588027547917e-05, "loss": 0.7936, "step": 3430 }, { "epoch": 0.2339937080180257, "grad_norm": 0.24979901313781738, "learning_rate": 1.82077587691955e-05, "loss": 0.7934, "step": 3440 }, { "epoch": 0.23467392228551995, "grad_norm": 0.24882297217845917, "learning_rate": 1.8194887979108425e-05, "loss": 0.7794, "step": 3450 }, { "epoch": 0.2353541365530142, "grad_norm": 0.2424454540014267, "learning_rate": 1.818197572241364e-05, "loss": 0.7868, "step": 3460 }, { "epoch": 0.23603435082050847, "grad_norm": 0.2578614056110382, "learning_rate": 1.8169022064447913e-05, "loss": 0.7885, "step": 3470 }, { "epoch": 0.23671456508800273, "grad_norm": 0.26300248503685, "learning_rate": 1.81560270707575e-05, "loss": 0.7791, "step": 3480 }, { "epoch": 0.237394779355497, "grad_norm": 0.24765539169311523, "learning_rate": 1.814299080709783e-05, "loss": 0.7945, "step": 3490 }, { "epoch": 0.23807499362299125, "grad_norm": 0.24388448894023895, "learning_rate": 1.812991333943314e-05, "loss": 0.7902, "step": 3500 }, { "epoch": 0.23875520789048552, "grad_norm": 0.2515674829483032, "learning_rate": 1.811679473393618e-05, "loss": 0.7856, "step": 3510 }, { "epoch": 0.23943542215797978, "grad_norm": 0.2512035667896271, "learning_rate": 1.8103635056987855e-05, "loss": 0.7857, "step": 3520 }, { "epoch": 0.240115636425474, "grad_norm": 0.2631925940513611, "learning_rate": 1.8090434375176888e-05, "loss": 0.788, "step": 3530 }, { "epoch": 0.24079585069296827, "grad_norm": 0.27402493357658386, "learning_rate": 1.8077192755299494e-05, "loss": 0.7789, "step": 3540 }, { "epoch": 0.24147606496046253, "grad_norm": 0.2507264018058777, "learning_rate": 1.806391026435904e-05, "loss": 0.7833, "step": 3550 }, { "epoch": 0.2421562792279568, "grad_norm": 0.25710999965667725, "learning_rate": 1.8050586969565697e-05, "loss": 0.7866, "step": 3560 }, { "epoch": 0.24283649349545106, "grad_norm": 0.2526022791862488, "learning_rate": 1.8037222938336107e-05, "loss": 0.7902, "step": 3570 }, { "epoch": 0.24351670776294532, "grad_norm": 0.2536466121673584, "learning_rate": 1.8023818238293046e-05, "loss": 0.7903, "step": 3580 }, { "epoch": 0.24419692203043958, "grad_norm": 0.24735701084136963, "learning_rate": 1.8010372937265063e-05, "loss": 0.7826, "step": 3590 }, { "epoch": 0.24487713629793384, "grad_norm": 0.25265151262283325, "learning_rate": 1.7996887103286173e-05, "loss": 0.7926, "step": 3600 }, { "epoch": 0.2455573505654281, "grad_norm": 0.2552418112754822, "learning_rate": 1.7983360804595467e-05, "loss": 0.7847, "step": 3610 }, { "epoch": 0.24623756483292236, "grad_norm": 0.2783318758010864, "learning_rate": 1.7969794109636806e-05, "loss": 0.7984, "step": 3620 }, { "epoch": 0.24691777910041662, "grad_norm": 0.2513044476509094, "learning_rate": 1.7956187087058447e-05, "loss": 0.7863, "step": 3630 }, { "epoch": 0.24759799336791088, "grad_norm": 0.2544722855091095, "learning_rate": 1.7942539805712715e-05, "loss": 0.7847, "step": 3640 }, { "epoch": 0.24827820763540515, "grad_norm": 0.2505376935005188, "learning_rate": 1.792885233465564e-05, "loss": 0.7801, "step": 3650 }, { "epoch": 0.2489584219028994, "grad_norm": 0.2470908910036087, "learning_rate": 1.7915124743146623e-05, "loss": 0.778, "step": 3660 }, { "epoch": 0.24963863617039367, "grad_norm": 0.28139612078666687, "learning_rate": 1.790135710064807e-05, "loss": 0.7649, "step": 3670 }, { "epoch": 0.25031885043788793, "grad_norm": 0.2537873089313507, "learning_rate": 1.788754947682505e-05, "loss": 0.7771, "step": 3680 }, { "epoch": 0.2509990647053822, "grad_norm": 0.24972447752952576, "learning_rate": 1.7873701941544935e-05, "loss": 0.7949, "step": 3690 }, { "epoch": 0.25167927897287645, "grad_norm": 0.25351646542549133, "learning_rate": 1.7859814564877063e-05, "loss": 0.7751, "step": 3700 }, { "epoch": 0.2523594932403707, "grad_norm": 0.2693154215812683, "learning_rate": 1.7845887417092358e-05, "loss": 0.7709, "step": 3710 }, { "epoch": 0.253039707507865, "grad_norm": 0.3126961588859558, "learning_rate": 1.7831920568663e-05, "loss": 0.7929, "step": 3720 }, { "epoch": 0.25371992177535924, "grad_norm": 0.2440711259841919, "learning_rate": 1.781791409026204e-05, "loss": 0.7842, "step": 3730 }, { "epoch": 0.2544001360428535, "grad_norm": 0.2512514591217041, "learning_rate": 1.780386805276308e-05, "loss": 0.7765, "step": 3740 }, { "epoch": 0.25508035031034776, "grad_norm": 0.24332073330879211, "learning_rate": 1.778978252723988e-05, "loss": 0.7965, "step": 3750 }, { "epoch": 0.255760564577842, "grad_norm": 0.2476550042629242, "learning_rate": 1.7775657584966018e-05, "loss": 0.7736, "step": 3760 }, { "epoch": 0.2564407788453363, "grad_norm": 0.2414562702178955, "learning_rate": 1.776149329741451e-05, "loss": 0.771, "step": 3770 }, { "epoch": 0.25712099311283054, "grad_norm": 0.2415563464164734, "learning_rate": 1.7747289736257482e-05, "loss": 0.7706, "step": 3780 }, { "epoch": 0.2578012073803248, "grad_norm": 0.24708572030067444, "learning_rate": 1.773304697336577e-05, "loss": 0.7917, "step": 3790 }, { "epoch": 0.25848142164781907, "grad_norm": 0.25027525424957275, "learning_rate": 1.771876508080858e-05, "loss": 0.7804, "step": 3800 }, { "epoch": 0.2591616359153133, "grad_norm": 0.2515070140361786, "learning_rate": 1.770444413085312e-05, "loss": 0.7864, "step": 3810 }, { "epoch": 0.2598418501828076, "grad_norm": 0.24446500837802887, "learning_rate": 1.769008419596422e-05, "loss": 0.7862, "step": 3820 }, { "epoch": 0.26052206445030185, "grad_norm": 0.25847122073173523, "learning_rate": 1.767568534880398e-05, "loss": 0.7912, "step": 3830 }, { "epoch": 0.2612022787177961, "grad_norm": 0.25062787532806396, "learning_rate": 1.766124766223141e-05, "loss": 0.7788, "step": 3840 }, { "epoch": 0.26188249298529037, "grad_norm": 0.2614087164402008, "learning_rate": 1.764677120930203e-05, "loss": 0.7853, "step": 3850 }, { "epoch": 0.26256270725278463, "grad_norm": 0.2507074773311615, "learning_rate": 1.763225606326754e-05, "loss": 0.7881, "step": 3860 }, { "epoch": 0.2632429215202789, "grad_norm": 0.3656567931175232, "learning_rate": 1.7617702297575408e-05, "loss": 0.7816, "step": 3870 }, { "epoch": 0.26392313578777316, "grad_norm": 0.24729153513908386, "learning_rate": 1.7603109985868544e-05, "loss": 0.7723, "step": 3880 }, { "epoch": 0.2646033500552674, "grad_norm": 0.2480657994747162, "learning_rate": 1.758847920198488e-05, "loss": 0.7978, "step": 3890 }, { "epoch": 0.2652835643227617, "grad_norm": 0.24121278524398804, "learning_rate": 1.7573810019957033e-05, "loss": 0.7864, "step": 3900 }, { "epoch": 0.26596377859025594, "grad_norm": 0.2533305585384369, "learning_rate": 1.7559102514011913e-05, "loss": 0.7786, "step": 3910 }, { "epoch": 0.2666439928577502, "grad_norm": 0.26402154564857483, "learning_rate": 1.7544356758570355e-05, "loss": 0.7931, "step": 3920 }, { "epoch": 0.26732420712524446, "grad_norm": 0.258201539516449, "learning_rate": 1.752957282824673e-05, "loss": 0.801, "step": 3930 }, { "epoch": 0.2680044213927387, "grad_norm": 0.25085070729255676, "learning_rate": 1.7514750797848587e-05, "loss": 0.7904, "step": 3940 }, { "epoch": 0.268684635660233, "grad_norm": 0.2558985948562622, "learning_rate": 1.7499890742376254e-05, "loss": 0.7947, "step": 3950 }, { "epoch": 0.26936484992772725, "grad_norm": 0.25026050209999084, "learning_rate": 1.7484992737022474e-05, "loss": 0.7767, "step": 3960 }, { "epoch": 0.2700450641952215, "grad_norm": 0.250328004360199, "learning_rate": 1.7470056857172017e-05, "loss": 0.783, "step": 3970 }, { "epoch": 0.27072527846271577, "grad_norm": 0.2586681842803955, "learning_rate": 1.7455083178401295e-05, "loss": 0.7867, "step": 3980 }, { "epoch": 0.27140549273021003, "grad_norm": 0.2514212131500244, "learning_rate": 1.7440071776477996e-05, "loss": 0.7876, "step": 3990 }, { "epoch": 0.2720857069977043, "grad_norm": 0.2571597695350647, "learning_rate": 1.7425022727360684e-05, "loss": 0.7852, "step": 4000 }, { "epoch": 0.27276592126519855, "grad_norm": 0.25561052560806274, "learning_rate": 1.7409936107198416e-05, "loss": 0.7902, "step": 4010 }, { "epoch": 0.2734461355326928, "grad_norm": 0.2517993450164795, "learning_rate": 1.739481199233036e-05, "loss": 0.7811, "step": 4020 }, { "epoch": 0.2741263498001871, "grad_norm": 0.24940787255764008, "learning_rate": 1.7379650459285417e-05, "loss": 0.7839, "step": 4030 }, { "epoch": 0.27480656406768134, "grad_norm": 0.24489739537239075, "learning_rate": 1.736445158478182e-05, "loss": 0.7957, "step": 4040 }, { "epoch": 0.2754867783351756, "grad_norm": 0.2565753757953644, "learning_rate": 1.7349215445726763e-05, "loss": 0.7781, "step": 4050 }, { "epoch": 0.27616699260266986, "grad_norm": 0.2571543753147125, "learning_rate": 1.733394211921598e-05, "loss": 0.7808, "step": 4060 }, { "epoch": 0.2768472068701641, "grad_norm": 0.2582187354564667, "learning_rate": 1.731863168253339e-05, "loss": 0.7962, "step": 4070 }, { "epoch": 0.2775274211376584, "grad_norm": 0.24603897333145142, "learning_rate": 1.7303284213150695e-05, "loss": 0.7772, "step": 4080 }, { "epoch": 0.27820763540515264, "grad_norm": 0.24644562602043152, "learning_rate": 1.7287899788726975e-05, "loss": 0.788, "step": 4090 }, { "epoch": 0.2788878496726469, "grad_norm": 0.2610757052898407, "learning_rate": 1.727247848710831e-05, "loss": 0.79, "step": 4100 }, { "epoch": 0.27956806394014116, "grad_norm": 0.24853172898292542, "learning_rate": 1.7257020386327376e-05, "loss": 0.7931, "step": 4110 }, { "epoch": 0.2802482782076354, "grad_norm": 0.2530014216899872, "learning_rate": 1.7241525564603058e-05, "loss": 0.7923, "step": 4120 }, { "epoch": 0.2809284924751297, "grad_norm": 0.2790886163711548, "learning_rate": 1.722599410034006e-05, "loss": 0.7676, "step": 4130 }, { "epoch": 0.28160870674262395, "grad_norm": 0.2523122727870941, "learning_rate": 1.7210426072128476e-05, "loss": 0.7803, "step": 4140 }, { "epoch": 0.2822889210101182, "grad_norm": 0.2545037865638733, "learning_rate": 1.719482155874344e-05, "loss": 0.7851, "step": 4150 }, { "epoch": 0.28296913527761247, "grad_norm": 0.28469881415367126, "learning_rate": 1.717918063914469e-05, "loss": 0.7883, "step": 4160 }, { "epoch": 0.28364934954510673, "grad_norm": 0.2560071051120758, "learning_rate": 1.716350339247619e-05, "loss": 0.7746, "step": 4170 }, { "epoch": 0.284329563812601, "grad_norm": 0.2442929893732071, "learning_rate": 1.714778989806571e-05, "loss": 0.7922, "step": 4180 }, { "epoch": 0.28500977808009526, "grad_norm": 0.2452080398797989, "learning_rate": 1.7132040235424443e-05, "loss": 0.7871, "step": 4190 }, { "epoch": 0.2856899923475895, "grad_norm": 0.23994505405426025, "learning_rate": 1.7116254484246596e-05, "loss": 0.7627, "step": 4200 }, { "epoch": 0.2863702066150838, "grad_norm": 0.255530446767807, "learning_rate": 1.710043272440898e-05, "loss": 0.7911, "step": 4210 }, { "epoch": 0.28705042088257804, "grad_norm": 0.24917486310005188, "learning_rate": 1.7084575035970624e-05, "loss": 0.793, "step": 4220 }, { "epoch": 0.2877306351500723, "grad_norm": 0.2526813745498657, "learning_rate": 1.7068681499172338e-05, "loss": 0.7778, "step": 4230 }, { "epoch": 0.2884108494175665, "grad_norm": 0.24449491500854492, "learning_rate": 1.705275219443635e-05, "loss": 0.7811, "step": 4240 }, { "epoch": 0.28909106368506077, "grad_norm": 0.26806896924972534, "learning_rate": 1.7036787202365858e-05, "loss": 0.8003, "step": 4250 }, { "epoch": 0.28977127795255503, "grad_norm": 0.2438434511423111, "learning_rate": 1.7020786603744647e-05, "loss": 0.783, "step": 4260 }, { "epoch": 0.2904514922200493, "grad_norm": 0.2513818144798279, "learning_rate": 1.7004750479536675e-05, "loss": 0.7772, "step": 4270 }, { "epoch": 0.29113170648754355, "grad_norm": 0.2550441324710846, "learning_rate": 1.6988678910885654e-05, "loss": 0.7786, "step": 4280 }, { "epoch": 0.2918119207550378, "grad_norm": 0.2485378384590149, "learning_rate": 1.697257197911466e-05, "loss": 0.7736, "step": 4290 }, { "epoch": 0.2924921350225321, "grad_norm": 0.249145969748497, "learning_rate": 1.695642976572569e-05, "loss": 0.7912, "step": 4300 }, { "epoch": 0.29317234929002634, "grad_norm": 0.2479313313961029, "learning_rate": 1.694025235239929e-05, "loss": 0.7746, "step": 4310 }, { "epoch": 0.2938525635575206, "grad_norm": 0.24584801495075226, "learning_rate": 1.6924039820994097e-05, "loss": 0.7835, "step": 4320 }, { "epoch": 0.29453277782501486, "grad_norm": 0.24700278043746948, "learning_rate": 1.6907792253546467e-05, "loss": 0.7745, "step": 4330 }, { "epoch": 0.2952129920925091, "grad_norm": 0.2509085237979889, "learning_rate": 1.689150973227003e-05, "loss": 0.7878, "step": 4340 }, { "epoch": 0.2958932063600034, "grad_norm": 0.24491603672504425, "learning_rate": 1.6875192339555284e-05, "loss": 0.7862, "step": 4350 }, { "epoch": 0.29657342062749764, "grad_norm": 0.2515392303466797, "learning_rate": 1.6858840157969186e-05, "loss": 0.7915, "step": 4360 }, { "epoch": 0.2972536348949919, "grad_norm": 0.25104475021362305, "learning_rate": 1.6842453270254717e-05, "loss": 0.7854, "step": 4370 }, { "epoch": 0.29793384916248616, "grad_norm": 0.27672502398490906, "learning_rate": 1.6826031759330478e-05, "loss": 0.7857, "step": 4380 }, { "epoch": 0.2986140634299804, "grad_norm": 0.24459819495677948, "learning_rate": 1.680957570829027e-05, "loss": 0.7709, "step": 4390 }, { "epoch": 0.2992942776974747, "grad_norm": 0.25333377718925476, "learning_rate": 1.6793085200402658e-05, "loss": 0.7737, "step": 4400 }, { "epoch": 0.29997449196496895, "grad_norm": 0.26826807856559753, "learning_rate": 1.6776560319110568e-05, "loss": 0.7892, "step": 4410 }, { "epoch": 0.3006547062324632, "grad_norm": 0.2829219400882721, "learning_rate": 1.676000114803086e-05, "loss": 0.7924, "step": 4420 }, { "epoch": 0.30133492049995747, "grad_norm": 0.24529558420181274, "learning_rate": 1.6743407770953893e-05, "loss": 0.7827, "step": 4430 }, { "epoch": 0.30201513476745173, "grad_norm": 0.24346935749053955, "learning_rate": 1.6726780271843125e-05, "loss": 0.7948, "step": 4440 }, { "epoch": 0.302695349034946, "grad_norm": 0.257079541683197, "learning_rate": 1.6710118734834655e-05, "loss": 0.7796, "step": 4450 }, { "epoch": 0.30337556330244025, "grad_norm": 0.2608999013900757, "learning_rate": 1.669342324423683e-05, "loss": 0.7788, "step": 4460 }, { "epoch": 0.3040557775699345, "grad_norm": 0.25176891684532166, "learning_rate": 1.6676693884529795e-05, "loss": 0.779, "step": 4470 }, { "epoch": 0.3047359918374288, "grad_norm": 0.2465016096830368, "learning_rate": 1.6659930740365084e-05, "loss": 0.7929, "step": 4480 }, { "epoch": 0.30541620610492304, "grad_norm": 0.254658579826355, "learning_rate": 1.6643133896565174e-05, "loss": 0.7916, "step": 4490 }, { "epoch": 0.3060964203724173, "grad_norm": 0.2789342999458313, "learning_rate": 1.6626303438123067e-05, "loss": 0.774, "step": 4500 }, { "epoch": 0.30677663463991156, "grad_norm": 0.24634107947349548, "learning_rate": 1.6609439450201858e-05, "loss": 0.7752, "step": 4510 }, { "epoch": 0.3074568489074058, "grad_norm": 0.25199273228645325, "learning_rate": 1.6592542018134307e-05, "loss": 0.776, "step": 4520 }, { "epoch": 0.3081370631749001, "grad_norm": 0.25096115469932556, "learning_rate": 1.657561122742239e-05, "loss": 0.7851, "step": 4530 }, { "epoch": 0.30881727744239434, "grad_norm": 0.2556649148464203, "learning_rate": 1.6558647163736896e-05, "loss": 0.7796, "step": 4540 }, { "epoch": 0.3094974917098886, "grad_norm": 0.24998249113559723, "learning_rate": 1.6541649912916967e-05, "loss": 0.7771, "step": 4550 }, { "epoch": 0.31017770597738287, "grad_norm": 0.2417616993188858, "learning_rate": 1.6524619560969673e-05, "loss": 0.7801, "step": 4560 }, { "epoch": 0.31085792024487713, "grad_norm": 0.24307110905647278, "learning_rate": 1.6507556194069584e-05, "loss": 0.7843, "step": 4570 }, { "epoch": 0.3115381345123714, "grad_norm": 0.24328400194644928, "learning_rate": 1.6490459898558326e-05, "loss": 0.7737, "step": 4580 }, { "epoch": 0.31221834877986565, "grad_norm": 0.25381749868392944, "learning_rate": 1.6473330760944143e-05, "loss": 0.7746, "step": 4590 }, { "epoch": 0.3128985630473599, "grad_norm": 0.25124746561050415, "learning_rate": 1.645616886790146e-05, "loss": 0.7819, "step": 4600 }, { "epoch": 0.3135787773148542, "grad_norm": 0.2559330463409424, "learning_rate": 1.6438974306270453e-05, "loss": 0.7774, "step": 4610 }, { "epoch": 0.31425899158234843, "grad_norm": 0.24517956376075745, "learning_rate": 1.64217471630566e-05, "loss": 0.78, "step": 4620 }, { "epoch": 0.3149392058498427, "grad_norm": 0.2602207660675049, "learning_rate": 1.6404487525430248e-05, "loss": 0.7799, "step": 4630 }, { "epoch": 0.31561942011733696, "grad_norm": 0.2795751690864563, "learning_rate": 1.638719548072615e-05, "loss": 0.7832, "step": 4640 }, { "epoch": 0.3162996343848312, "grad_norm": 0.2633031904697418, "learning_rate": 1.6369871116443064e-05, "loss": 0.7741, "step": 4650 }, { "epoch": 0.3169798486523255, "grad_norm": 0.25834837555885315, "learning_rate": 1.635251452024327e-05, "loss": 0.7919, "step": 4660 }, { "epoch": 0.31766006291981974, "grad_norm": 0.2532989978790283, "learning_rate": 1.6335125779952153e-05, "loss": 0.7656, "step": 4670 }, { "epoch": 0.318340277187314, "grad_norm": 0.2523132264614105, "learning_rate": 1.631770498355775e-05, "loss": 0.7708, "step": 4680 }, { "epoch": 0.31902049145480826, "grad_norm": 0.25467121601104736, "learning_rate": 1.630025221921029e-05, "loss": 0.7786, "step": 4690 }, { "epoch": 0.3197007057223025, "grad_norm": 0.25180891156196594, "learning_rate": 1.628276757522178e-05, "loss": 0.7823, "step": 4700 }, { "epoch": 0.3203809199897968, "grad_norm": 0.2540765404701233, "learning_rate": 1.626525114006553e-05, "loss": 0.7902, "step": 4710 }, { "epoch": 0.32106113425729105, "grad_norm": 0.24241061508655548, "learning_rate": 1.624770300237571e-05, "loss": 0.7944, "step": 4720 }, { "epoch": 0.3217413485247853, "grad_norm": 0.2548539340496063, "learning_rate": 1.6230123250946913e-05, "loss": 0.7758, "step": 4730 }, { "epoch": 0.32242156279227957, "grad_norm": 0.24303984642028809, "learning_rate": 1.621251197473371e-05, "loss": 0.7802, "step": 4740 }, { "epoch": 0.32310177705977383, "grad_norm": 0.25173860788345337, "learning_rate": 1.6194869262850165e-05, "loss": 0.7695, "step": 4750 }, { "epoch": 0.3237819913272681, "grad_norm": 0.2511792480945587, "learning_rate": 1.6177195204569432e-05, "loss": 0.7739, "step": 4760 }, { "epoch": 0.32446220559476235, "grad_norm": 0.26270678639411926, "learning_rate": 1.6159489889323266e-05, "loss": 0.7921, "step": 4770 }, { "epoch": 0.3251424198622566, "grad_norm": 0.2512210011482239, "learning_rate": 1.6141753406701582e-05, "loss": 0.7776, "step": 4780 }, { "epoch": 0.3258226341297509, "grad_norm": 0.2432084083557129, "learning_rate": 1.612398584645202e-05, "loss": 0.7793, "step": 4790 }, { "epoch": 0.32650284839724514, "grad_norm": 0.24282826483249664, "learning_rate": 1.6106187298479455e-05, "loss": 0.7697, "step": 4800 }, { "epoch": 0.3271830626647394, "grad_norm": 0.27392762899398804, "learning_rate": 1.608835785284557e-05, "loss": 0.7847, "step": 4810 }, { "epoch": 0.32786327693223366, "grad_norm": 0.24720722436904907, "learning_rate": 1.60704975997684e-05, "loss": 0.7678, "step": 4820 }, { "epoch": 0.3285434911997279, "grad_norm": 0.2537555694580078, "learning_rate": 1.6052606629621856e-05, "loss": 0.7774, "step": 4830 }, { "epoch": 0.3292237054672222, "grad_norm": 0.2609635591506958, "learning_rate": 1.6034685032935277e-05, "loss": 0.7801, "step": 4840 }, { "epoch": 0.32990391973471644, "grad_norm": 0.2454914003610611, "learning_rate": 1.6016732900392987e-05, "loss": 0.7844, "step": 4850 }, { "epoch": 0.3305841340022107, "grad_norm": 0.2437964379787445, "learning_rate": 1.5998750322833814e-05, "loss": 0.7694, "step": 4860 }, { "epoch": 0.33126434826970497, "grad_norm": 0.2476215809583664, "learning_rate": 1.5980737391250643e-05, "loss": 0.7864, "step": 4870 }, { "epoch": 0.33194456253719923, "grad_norm": 0.24153448641300201, "learning_rate": 1.5962694196789947e-05, "loss": 0.7679, "step": 4880 }, { "epoch": 0.3326247768046935, "grad_norm": 0.26852676272392273, "learning_rate": 1.5944620830751342e-05, "loss": 0.7768, "step": 4890 }, { "epoch": 0.33330499107218775, "grad_norm": 0.2522974908351898, "learning_rate": 1.5926517384587097e-05, "loss": 0.7886, "step": 4900 }, { "epoch": 0.333985205339682, "grad_norm": 0.249611958861351, "learning_rate": 1.590838394990171e-05, "loss": 0.7885, "step": 4910 }, { "epoch": 0.3346654196071763, "grad_norm": 0.24254435300827026, "learning_rate": 1.5890220618451397e-05, "loss": 0.769, "step": 4920 }, { "epoch": 0.33534563387467053, "grad_norm": 0.2485640048980713, "learning_rate": 1.587202748214368e-05, "loss": 0.7748, "step": 4930 }, { "epoch": 0.3360258481421648, "grad_norm": 0.24997614324092865, "learning_rate": 1.5853804633036875e-05, "loss": 0.7668, "step": 4940 }, { "epoch": 0.33670606240965906, "grad_norm": 0.24659042060375214, "learning_rate": 1.5835552163339648e-05, "loss": 0.7923, "step": 4950 }, { "epoch": 0.3373862766771533, "grad_norm": 0.2528908848762512, "learning_rate": 1.5817270165410566e-05, "loss": 0.7734, "step": 4960 }, { "epoch": 0.3380664909446476, "grad_norm": 0.24560825526714325, "learning_rate": 1.5798958731757588e-05, "loss": 0.7791, "step": 4970 }, { "epoch": 0.33874670521214184, "grad_norm": 0.2493152767419815, "learning_rate": 1.578061795503763e-05, "loss": 0.7606, "step": 4980 }, { "epoch": 0.3394269194796361, "grad_norm": 0.24539536237716675, "learning_rate": 1.5762247928056085e-05, "loss": 0.7937, "step": 4990 }, { "epoch": 0.34010713374713036, "grad_norm": 0.24469323456287384, "learning_rate": 1.5743848743766358e-05, "loss": 0.7973, "step": 5000 }, { "epoch": 0.3407873480146246, "grad_norm": 0.2577314078807831, "learning_rate": 1.5725420495269378e-05, "loss": 0.7773, "step": 5010 }, { "epoch": 0.3414675622821189, "grad_norm": 0.2408699095249176, "learning_rate": 1.5706963275813155e-05, "loss": 0.7908, "step": 5020 }, { "epoch": 0.34214777654961315, "grad_norm": 0.24674849212169647, "learning_rate": 1.5688477178792288e-05, "loss": 0.7625, "step": 5030 }, { "epoch": 0.3428279908171074, "grad_norm": 0.24679632484912872, "learning_rate": 1.5669962297747497e-05, "loss": 0.7744, "step": 5040 }, { "epoch": 0.34350820508460167, "grad_norm": 0.25358954071998596, "learning_rate": 1.5651418726365155e-05, "loss": 0.7795, "step": 5050 }, { "epoch": 0.34418841935209593, "grad_norm": 0.25233152508735657, "learning_rate": 1.56328465584768e-05, "loss": 0.7797, "step": 5060 }, { "epoch": 0.3448686336195902, "grad_norm": 0.2582693099975586, "learning_rate": 1.5614245888058687e-05, "loss": 0.7811, "step": 5070 }, { "epoch": 0.34554884788708445, "grad_norm": 0.25043052434921265, "learning_rate": 1.5595616809231277e-05, "loss": 0.7994, "step": 5080 }, { "epoch": 0.3462290621545787, "grad_norm": 0.2512742877006531, "learning_rate": 1.5576959416258795e-05, "loss": 0.7787, "step": 5090 }, { "epoch": 0.346909276422073, "grad_norm": 0.24891647696495056, "learning_rate": 1.555827380354872e-05, "loss": 0.7867, "step": 5100 }, { "epoch": 0.34758949068956724, "grad_norm": 0.2560541331768036, "learning_rate": 1.5539560065651342e-05, "loss": 0.7905, "step": 5110 }, { "epoch": 0.3482697049570615, "grad_norm": 0.24090375006198883, "learning_rate": 1.552081829725926e-05, "loss": 0.7897, "step": 5120 }, { "epoch": 0.34894991922455576, "grad_norm": 0.2456362247467041, "learning_rate": 1.5502048593206897e-05, "loss": 0.7639, "step": 5130 }, { "epoch": 0.34963013349205, "grad_norm": 0.25189900398254395, "learning_rate": 1.5483251048470055e-05, "loss": 0.7778, "step": 5140 }, { "epoch": 0.3503103477595443, "grad_norm": 0.252087265253067, "learning_rate": 1.546442575816539e-05, "loss": 0.787, "step": 5150 }, { "epoch": 0.35099056202703854, "grad_norm": 0.25998103618621826, "learning_rate": 1.5445572817549963e-05, "loss": 0.7628, "step": 5160 }, { "epoch": 0.3516707762945328, "grad_norm": 0.25459837913513184, "learning_rate": 1.5426692322020748e-05, "loss": 0.7859, "step": 5170 }, { "epoch": 0.352350990562027, "grad_norm": 0.2553059756755829, "learning_rate": 1.5407784367114143e-05, "loss": 0.7805, "step": 5180 }, { "epoch": 0.3530312048295213, "grad_norm": 0.2508421242237091, "learning_rate": 1.5388849048505496e-05, "loss": 0.7674, "step": 5190 }, { "epoch": 0.35371141909701553, "grad_norm": 0.2463318556547165, "learning_rate": 1.5369886462008617e-05, "loss": 0.782, "step": 5200 }, { "epoch": 0.3543916333645098, "grad_norm": 0.2445809543132782, "learning_rate": 1.5350896703575288e-05, "loss": 0.7723, "step": 5210 }, { "epoch": 0.35507184763200406, "grad_norm": 0.24156180024147034, "learning_rate": 1.533187986929479e-05, "loss": 0.7796, "step": 5220 }, { "epoch": 0.3557520618994983, "grad_norm": 0.2451319843530655, "learning_rate": 1.5312836055393402e-05, "loss": 0.7788, "step": 5230 }, { "epoch": 0.3564322761669926, "grad_norm": 0.25641563534736633, "learning_rate": 1.5293765358233927e-05, "loss": 0.7802, "step": 5240 }, { "epoch": 0.35711249043448684, "grad_norm": 0.2478734701871872, "learning_rate": 1.5274667874315195e-05, "loss": 0.7858, "step": 5250 }, { "epoch": 0.3577927047019811, "grad_norm": 0.24530655145645142, "learning_rate": 1.525554370027159e-05, "loss": 0.7742, "step": 5260 }, { "epoch": 0.35847291896947536, "grad_norm": 0.24920684099197388, "learning_rate": 1.5236392932872525e-05, "loss": 0.7684, "step": 5270 }, { "epoch": 0.3591531332369696, "grad_norm": 0.26737821102142334, "learning_rate": 1.5217215669022002e-05, "loss": 0.7722, "step": 5280 }, { "epoch": 0.3598333475044639, "grad_norm": 0.2467416673898697, "learning_rate": 1.5198012005758083e-05, "loss": 0.7755, "step": 5290 }, { "epoch": 0.36051356177195815, "grad_norm": 0.26333990693092346, "learning_rate": 1.5178782040252417e-05, "loss": 0.7973, "step": 5300 }, { "epoch": 0.3611937760394524, "grad_norm": 0.2541069984436035, "learning_rate": 1.5159525869809746e-05, "loss": 0.7802, "step": 5310 }, { "epoch": 0.36187399030694667, "grad_norm": 0.24673090875148773, "learning_rate": 1.5140243591867405e-05, "loss": 0.7775, "step": 5320 }, { "epoch": 0.36255420457444093, "grad_norm": 0.2515685260295868, "learning_rate": 1.5120935303994833e-05, "loss": 0.7836, "step": 5330 }, { "epoch": 0.3632344188419352, "grad_norm": 0.27212855219841003, "learning_rate": 1.510160110389309e-05, "loss": 0.7738, "step": 5340 }, { "epoch": 0.36391463310942945, "grad_norm": 0.25533565878868103, "learning_rate": 1.5082241089394348e-05, "loss": 0.7702, "step": 5350 }, { "epoch": 0.3645948473769237, "grad_norm": 0.24824172258377075, "learning_rate": 1.5062855358461397e-05, "loss": 0.7695, "step": 5360 }, { "epoch": 0.365275061644418, "grad_norm": 0.2359800785779953, "learning_rate": 1.5043444009187163e-05, "loss": 0.7744, "step": 5370 }, { "epoch": 0.36595527591191224, "grad_norm": 0.2501261234283447, "learning_rate": 1.5024007139794195e-05, "loss": 0.7688, "step": 5380 }, { "epoch": 0.3666354901794065, "grad_norm": 0.26191291213035583, "learning_rate": 1.5004544848634177e-05, "loss": 0.7882, "step": 5390 }, { "epoch": 0.36731570444690076, "grad_norm": 0.2488580048084259, "learning_rate": 1.498505723418743e-05, "loss": 0.7738, "step": 5400 }, { "epoch": 0.367995918714395, "grad_norm": 0.26761361956596375, "learning_rate": 1.4965544395062412e-05, "loss": 0.7656, "step": 5410 }, { "epoch": 0.3686761329818893, "grad_norm": 0.24776126444339752, "learning_rate": 1.4946006429995219e-05, "loss": 0.7794, "step": 5420 }, { "epoch": 0.36935634724938354, "grad_norm": 0.2511827051639557, "learning_rate": 1.4926443437849087e-05, "loss": 0.7682, "step": 5430 }, { "epoch": 0.3700365615168778, "grad_norm": 0.25186964869499207, "learning_rate": 1.4906855517613885e-05, "loss": 0.7822, "step": 5440 }, { "epoch": 0.37071677578437207, "grad_norm": 0.25966915488243103, "learning_rate": 1.4887242768405627e-05, "loss": 0.7831, "step": 5450 }, { "epoch": 0.3713969900518663, "grad_norm": 0.2607783377170563, "learning_rate": 1.4867605289465958e-05, "loss": 0.7673, "step": 5460 }, { "epoch": 0.3720772043193606, "grad_norm": 0.24937234818935394, "learning_rate": 1.4847943180161661e-05, "loss": 0.7774, "step": 5470 }, { "epoch": 0.37275741858685485, "grad_norm": 0.2692229151725769, "learning_rate": 1.4828256539984146e-05, "loss": 0.759, "step": 5480 }, { "epoch": 0.3734376328543491, "grad_norm": 0.2526799142360687, "learning_rate": 1.480854546854895e-05, "loss": 0.7817, "step": 5490 }, { "epoch": 0.37411784712184337, "grad_norm": 0.25847598910331726, "learning_rate": 1.4788810065595238e-05, "loss": 0.7826, "step": 5500 }, { "epoch": 0.37479806138933763, "grad_norm": 0.27168703079223633, "learning_rate": 1.4769050430985292e-05, "loss": 0.7792, "step": 5510 }, { "epoch": 0.3754782756568319, "grad_norm": 0.25303542613983154, "learning_rate": 1.474926666470401e-05, "loss": 0.7704, "step": 5520 }, { "epoch": 0.37615848992432616, "grad_norm": 0.2534182071685791, "learning_rate": 1.4729458866858389e-05, "loss": 0.7783, "step": 5530 }, { "epoch": 0.3768387041918204, "grad_norm": 0.2554495334625244, "learning_rate": 1.4709627137677038e-05, "loss": 0.7737, "step": 5540 }, { "epoch": 0.3775189184593147, "grad_norm": 0.24515779316425323, "learning_rate": 1.468977157750965e-05, "loss": 0.7743, "step": 5550 }, { "epoch": 0.37819913272680894, "grad_norm": 0.26140493154525757, "learning_rate": 1.4669892286826516e-05, "loss": 0.7818, "step": 5560 }, { "epoch": 0.3788793469943032, "grad_norm": 0.30516576766967773, "learning_rate": 1.4649989366217993e-05, "loss": 0.7846, "step": 5570 }, { "epoch": 0.37955956126179746, "grad_norm": 0.25299209356307983, "learning_rate": 1.4630062916394014e-05, "loss": 0.7743, "step": 5580 }, { "epoch": 0.3802397755292917, "grad_norm": 0.26081040501594543, "learning_rate": 1.4610113038183569e-05, "loss": 0.784, "step": 5590 }, { "epoch": 0.380919989796786, "grad_norm": 0.2585929036140442, "learning_rate": 1.4590139832534194e-05, "loss": 0.7749, "step": 5600 }, { "epoch": 0.38160020406428025, "grad_norm": 0.2520377039909363, "learning_rate": 1.457014340051147e-05, "loss": 0.7669, "step": 5610 }, { "epoch": 0.3822804183317745, "grad_norm": 0.25388965010643005, "learning_rate": 1.4550123843298499e-05, "loss": 0.7754, "step": 5620 }, { "epoch": 0.38296063259926877, "grad_norm": 0.24941669404506683, "learning_rate": 1.45300812621954e-05, "loss": 0.772, "step": 5630 }, { "epoch": 0.38364084686676303, "grad_norm": 0.24526414275169373, "learning_rate": 1.4510015758618794e-05, "loss": 0.7767, "step": 5640 }, { "epoch": 0.3843210611342573, "grad_norm": 0.2574145495891571, "learning_rate": 1.448992743410129e-05, "loss": 0.7638, "step": 5650 }, { "epoch": 0.38500127540175155, "grad_norm": 0.25559088587760925, "learning_rate": 1.4469816390290973e-05, "loss": 0.7887, "step": 5660 }, { "epoch": 0.3856814896692458, "grad_norm": 0.244601309299469, "learning_rate": 1.4449682728950895e-05, "loss": 0.7873, "step": 5670 }, { "epoch": 0.3863617039367401, "grad_norm": 0.24688373506069183, "learning_rate": 1.4429526551958541e-05, "loss": 0.7796, "step": 5680 }, { "epoch": 0.38704191820423434, "grad_norm": 0.25341495871543884, "learning_rate": 1.4409347961305339e-05, "loss": 0.7797, "step": 5690 }, { "epoch": 0.3877221324717286, "grad_norm": 0.26516810059547424, "learning_rate": 1.4389147059096121e-05, "loss": 0.7776, "step": 5700 }, { "epoch": 0.38840234673922286, "grad_norm": 0.2658517062664032, "learning_rate": 1.4368923947548626e-05, "loss": 0.7665, "step": 5710 }, { "epoch": 0.3890825610067171, "grad_norm": 0.25798964500427246, "learning_rate": 1.4348678728992967e-05, "loss": 0.7956, "step": 5720 }, { "epoch": 0.3897627752742114, "grad_norm": 0.2526843845844269, "learning_rate": 1.4328411505871126e-05, "loss": 0.7783, "step": 5730 }, { "epoch": 0.39044298954170564, "grad_norm": 0.25203028321266174, "learning_rate": 1.4308122380736419e-05, "loss": 0.7608, "step": 5740 }, { "epoch": 0.3911232038091999, "grad_norm": 0.24806834757328033, "learning_rate": 1.4287811456253e-05, "loss": 0.7802, "step": 5750 }, { "epoch": 0.39180341807669417, "grad_norm": 0.24196073412895203, "learning_rate": 1.4267478835195321e-05, "loss": 0.772, "step": 5760 }, { "epoch": 0.3924836323441884, "grad_norm": 0.25286272168159485, "learning_rate": 1.424712462044762e-05, "loss": 0.7623, "step": 5770 }, { "epoch": 0.3931638466116827, "grad_norm": 0.25081154704093933, "learning_rate": 1.422674891500341e-05, "loss": 0.7876, "step": 5780 }, { "epoch": 0.39384406087917695, "grad_norm": 0.2481163740158081, "learning_rate": 1.420635182196493e-05, "loss": 0.7722, "step": 5790 }, { "epoch": 0.3945242751466712, "grad_norm": 0.27228838205337524, "learning_rate": 1.4185933444542657e-05, "loss": 0.7875, "step": 5800 }, { "epoch": 0.39520448941416547, "grad_norm": 0.24268393218517303, "learning_rate": 1.4165493886054763e-05, "loss": 0.777, "step": 5810 }, { "epoch": 0.39588470368165973, "grad_norm": 0.25057727098464966, "learning_rate": 1.4145033249926598e-05, "loss": 0.7737, "step": 5820 }, { "epoch": 0.396564917949154, "grad_norm": 0.2459910362958908, "learning_rate": 1.4124551639690166e-05, "loss": 0.7748, "step": 5830 }, { "epoch": 0.39724513221664826, "grad_norm": 0.25080984830856323, "learning_rate": 1.4104049158983596e-05, "loss": 0.7749, "step": 5840 }, { "epoch": 0.3979253464841425, "grad_norm": 0.254576712846756, "learning_rate": 1.4083525911550635e-05, "loss": 0.7779, "step": 5850 }, { "epoch": 0.3986055607516368, "grad_norm": 0.24936212599277496, "learning_rate": 1.4062982001240097e-05, "loss": 0.7789, "step": 5860 }, { "epoch": 0.39928577501913104, "grad_norm": 0.244483083486557, "learning_rate": 1.4042417532005361e-05, "loss": 0.7636, "step": 5870 }, { "epoch": 0.3999659892866253, "grad_norm": 0.25264477729797363, "learning_rate": 1.4021832607903831e-05, "loss": 0.7868, "step": 5880 }, { "epoch": 0.40064620355411956, "grad_norm": 0.26535698771476746, "learning_rate": 1.4001227333096416e-05, "loss": 0.7796, "step": 5890 }, { "epoch": 0.4013264178216138, "grad_norm": 0.2568298876285553, "learning_rate": 1.3980601811847001e-05, "loss": 0.7648, "step": 5900 }, { "epoch": 0.4020066320891081, "grad_norm": 0.2487412691116333, "learning_rate": 1.3959956148521916e-05, "loss": 0.7641, "step": 5910 }, { "epoch": 0.40268684635660235, "grad_norm": 0.24890504777431488, "learning_rate": 1.393929044758941e-05, "loss": 0.7741, "step": 5920 }, { "epoch": 0.4033670606240966, "grad_norm": 0.251572847366333, "learning_rate": 1.3918604813619132e-05, "loss": 0.796, "step": 5930 }, { "epoch": 0.40404727489159087, "grad_norm": 0.23951993882656097, "learning_rate": 1.3897899351281585e-05, "loss": 0.7578, "step": 5940 }, { "epoch": 0.40472748915908513, "grad_norm": 0.2541947066783905, "learning_rate": 1.3877174165347606e-05, "loss": 0.7668, "step": 5950 }, { "epoch": 0.4054077034265794, "grad_norm": 0.26087838411331177, "learning_rate": 1.385642936068784e-05, "loss": 0.7788, "step": 5960 }, { "epoch": 0.40608791769407365, "grad_norm": 0.24299927055835724, "learning_rate": 1.3835665042272195e-05, "loss": 0.7744, "step": 5970 }, { "epoch": 0.4067681319615679, "grad_norm": 0.2495705932378769, "learning_rate": 1.3814881315169328e-05, "loss": 0.767, "step": 5980 }, { "epoch": 0.4074483462290622, "grad_norm": 0.24570466578006744, "learning_rate": 1.37940782845461e-05, "loss": 0.7739, "step": 5990 }, { "epoch": 0.40812856049655644, "grad_norm": 0.24526531994342804, "learning_rate": 1.3773256055667053e-05, "loss": 0.7703, "step": 6000 }, { "epoch": 0.4088087747640507, "grad_norm": 0.2453545182943344, "learning_rate": 1.3752414733893866e-05, "loss": 0.7771, "step": 6010 }, { "epoch": 0.40948898903154496, "grad_norm": 0.26506248116493225, "learning_rate": 1.3731554424684839e-05, "loss": 0.7818, "step": 6020 }, { "epoch": 0.4101692032990392, "grad_norm": 0.2656206786632538, "learning_rate": 1.3710675233594342e-05, "loss": 0.7724, "step": 6030 }, { "epoch": 0.4108494175665335, "grad_norm": 0.24015671014785767, "learning_rate": 1.3689777266272292e-05, "loss": 0.7647, "step": 6040 }, { "epoch": 0.41152963183402774, "grad_norm": 0.24293836951255798, "learning_rate": 1.3668860628463611e-05, "loss": 0.7673, "step": 6050 }, { "epoch": 0.412209846101522, "grad_norm": 0.2461855262517929, "learning_rate": 1.3647925426007703e-05, "loss": 0.7577, "step": 6060 }, { "epoch": 0.41289006036901627, "grad_norm": 0.24776321649551392, "learning_rate": 1.3626971764837903e-05, "loss": 0.7826, "step": 6070 }, { "epoch": 0.4135702746365105, "grad_norm": 0.23962141573429108, "learning_rate": 1.3605999750980943e-05, "loss": 0.7703, "step": 6080 }, { "epoch": 0.4142504889040048, "grad_norm": 0.2501792013645172, "learning_rate": 1.3585009490556435e-05, "loss": 0.7685, "step": 6090 }, { "epoch": 0.41493070317149905, "grad_norm": 0.25442349910736084, "learning_rate": 1.3564001089776313e-05, "loss": 0.7585, "step": 6100 }, { "epoch": 0.4156109174389933, "grad_norm": 0.2500801682472229, "learning_rate": 1.3542974654944297e-05, "loss": 0.7649, "step": 6110 }, { "epoch": 0.4162911317064875, "grad_norm": 0.25212499499320984, "learning_rate": 1.3521930292455363e-05, "loss": 0.7607, "step": 6120 }, { "epoch": 0.4169713459739818, "grad_norm": 0.24636310338974, "learning_rate": 1.3500868108795213e-05, "loss": 0.7744, "step": 6130 }, { "epoch": 0.41765156024147604, "grad_norm": 0.24590402841567993, "learning_rate": 1.3479788210539705e-05, "loss": 0.7706, "step": 6140 }, { "epoch": 0.4183317745089703, "grad_norm": 0.2557471990585327, "learning_rate": 1.3458690704354353e-05, "loss": 0.7683, "step": 6150 }, { "epoch": 0.41901198877646456, "grad_norm": 0.250740110874176, "learning_rate": 1.3437575696993756e-05, "loss": 0.7672, "step": 6160 }, { "epoch": 0.4196922030439588, "grad_norm": 0.23877675831317902, "learning_rate": 1.3416443295301076e-05, "loss": 0.7613, "step": 6170 }, { "epoch": 0.4203724173114531, "grad_norm": 0.2432548552751541, "learning_rate": 1.3395293606207487e-05, "loss": 0.7752, "step": 6180 }, { "epoch": 0.42105263157894735, "grad_norm": 0.24699924886226654, "learning_rate": 1.3374126736731643e-05, "loss": 0.7823, "step": 6190 }, { "epoch": 0.4217328458464416, "grad_norm": 0.24633049964904785, "learning_rate": 1.3352942793979127e-05, "loss": 0.774, "step": 6200 }, { "epoch": 0.42241306011393587, "grad_norm": 0.25033462047576904, "learning_rate": 1.3331741885141915e-05, "loss": 0.7626, "step": 6210 }, { "epoch": 0.42309327438143013, "grad_norm": 0.2341737002134323, "learning_rate": 1.3310524117497839e-05, "loss": 0.7777, "step": 6220 }, { "epoch": 0.4237734886489244, "grad_norm": 0.2521795928478241, "learning_rate": 1.3289289598410024e-05, "loss": 0.7727, "step": 6230 }, { "epoch": 0.42445370291641865, "grad_norm": 0.24901489913463593, "learning_rate": 1.3268038435326374e-05, "loss": 0.7881, "step": 6240 }, { "epoch": 0.4251339171839129, "grad_norm": 0.2500702142715454, "learning_rate": 1.3246770735778998e-05, "loss": 0.7692, "step": 6250 }, { "epoch": 0.4258141314514072, "grad_norm": 0.24418380856513977, "learning_rate": 1.3225486607383693e-05, "loss": 0.7714, "step": 6260 }, { "epoch": 0.42649434571890144, "grad_norm": 0.24833431839942932, "learning_rate": 1.3204186157839379e-05, "loss": 0.7714, "step": 6270 }, { "epoch": 0.4271745599863957, "grad_norm": 0.2640336751937866, "learning_rate": 1.3182869494927562e-05, "loss": 0.7642, "step": 6280 }, { "epoch": 0.42785477425388996, "grad_norm": 0.2517968416213989, "learning_rate": 1.3161536726511791e-05, "loss": 0.782, "step": 6290 }, { "epoch": 0.4285349885213842, "grad_norm": 0.254301518201828, "learning_rate": 1.3140187960537113e-05, "loss": 0.7703, "step": 6300 }, { "epoch": 0.4292152027888785, "grad_norm": 0.24380703270435333, "learning_rate": 1.311882330502952e-05, "loss": 0.7648, "step": 6310 }, { "epoch": 0.42989541705637274, "grad_norm": 0.2415427565574646, "learning_rate": 1.3097442868095403e-05, "loss": 0.7791, "step": 6320 }, { "epoch": 0.430575631323867, "grad_norm": 0.24330158531665802, "learning_rate": 1.3076046757921011e-05, "loss": 0.7698, "step": 6330 }, { "epoch": 0.43125584559136126, "grad_norm": 0.25078320503234863, "learning_rate": 1.30546350827719e-05, "loss": 0.7741, "step": 6340 }, { "epoch": 0.4319360598588555, "grad_norm": 0.2747834026813507, "learning_rate": 1.303320795099239e-05, "loss": 0.7742, "step": 6350 }, { "epoch": 0.4326162741263498, "grad_norm": 0.24714337289333344, "learning_rate": 1.3011765471005004e-05, "loss": 0.7669, "step": 6360 }, { "epoch": 0.43329648839384405, "grad_norm": 0.25236451625823975, "learning_rate": 1.2990307751309936e-05, "loss": 0.7818, "step": 6370 }, { "epoch": 0.4339767026613383, "grad_norm": 0.26118069887161255, "learning_rate": 1.2968834900484489e-05, "loss": 0.7619, "step": 6380 }, { "epoch": 0.43465691692883257, "grad_norm": 0.25219205021858215, "learning_rate": 1.2947347027182531e-05, "loss": 0.7694, "step": 6390 }, { "epoch": 0.43533713119632683, "grad_norm": 0.24675573408603668, "learning_rate": 1.2925844240133945e-05, "loss": 0.7801, "step": 6400 }, { "epoch": 0.4360173454638211, "grad_norm": 0.24943795800209045, "learning_rate": 1.290432664814408e-05, "loss": 0.7956, "step": 6410 }, { "epoch": 0.43669755973131535, "grad_norm": 0.25274989008903503, "learning_rate": 1.2882794360093196e-05, "loss": 0.7821, "step": 6420 }, { "epoch": 0.4373777739988096, "grad_norm": 0.24581989645957947, "learning_rate": 1.2861247484935917e-05, "loss": 0.7698, "step": 6430 }, { "epoch": 0.4380579882663039, "grad_norm": 0.24845002591609955, "learning_rate": 1.283968613170068e-05, "loss": 0.7598, "step": 6440 }, { "epoch": 0.43873820253379814, "grad_norm": 0.24644219875335693, "learning_rate": 1.2818110409489184e-05, "loss": 0.766, "step": 6450 }, { "epoch": 0.4394184168012924, "grad_norm": 0.26946285367012024, "learning_rate": 1.2796520427475827e-05, "loss": 0.7848, "step": 6460 }, { "epoch": 0.44009863106878666, "grad_norm": 0.25460585951805115, "learning_rate": 1.2774916294907171e-05, "loss": 0.7803, "step": 6470 }, { "epoch": 0.4407788453362809, "grad_norm": 0.24377070367336273, "learning_rate": 1.2753298121101378e-05, "loss": 0.7768, "step": 6480 }, { "epoch": 0.4414590596037752, "grad_norm": 0.23929229378700256, "learning_rate": 1.2731666015447659e-05, "loss": 0.7773, "step": 6490 }, { "epoch": 0.44213927387126944, "grad_norm": 0.25252291560173035, "learning_rate": 1.2710020087405721e-05, "loss": 0.7825, "step": 6500 }, { "epoch": 0.4428194881387637, "grad_norm": 0.2470589131116867, "learning_rate": 1.2688360446505213e-05, "loss": 0.7599, "step": 6510 }, { "epoch": 0.44349970240625797, "grad_norm": 0.24808578193187714, "learning_rate": 1.2666687202345171e-05, "loss": 0.7761, "step": 6520 }, { "epoch": 0.44417991667375223, "grad_norm": 0.26062536239624023, "learning_rate": 1.264500046459347e-05, "loss": 0.7659, "step": 6530 }, { "epoch": 0.4448601309412465, "grad_norm": 0.2558269500732422, "learning_rate": 1.2623300342986256e-05, "loss": 0.77, "step": 6540 }, { "epoch": 0.44554034520874075, "grad_norm": 0.24331645667552948, "learning_rate": 1.26015869473274e-05, "loss": 0.7849, "step": 6550 }, { "epoch": 0.446220559476235, "grad_norm": 0.2490650862455368, "learning_rate": 1.2579860387487941e-05, "loss": 0.7692, "step": 6560 }, { "epoch": 0.4469007737437293, "grad_norm": 0.252430260181427, "learning_rate": 1.2558120773405536e-05, "loss": 0.7677, "step": 6570 }, { "epoch": 0.44758098801122354, "grad_norm": 0.24718795716762543, "learning_rate": 1.2536368215083883e-05, "loss": 0.7708, "step": 6580 }, { "epoch": 0.4482612022787178, "grad_norm": 0.25127896666526794, "learning_rate": 1.2514602822592189e-05, "loss": 0.7753, "step": 6590 }, { "epoch": 0.44894141654621206, "grad_norm": 0.2372049242258072, "learning_rate": 1.2492824706064602e-05, "loss": 0.7749, "step": 6600 }, { "epoch": 0.4496216308137063, "grad_norm": 0.23758196830749512, "learning_rate": 1.247103397569965e-05, "loss": 0.782, "step": 6610 }, { "epoch": 0.4503018450812006, "grad_norm": 0.2627807855606079, "learning_rate": 1.2449230741759694e-05, "loss": 0.7667, "step": 6620 }, { "epoch": 0.45098205934869484, "grad_norm": 0.2461169809103012, "learning_rate": 1.2427415114570361e-05, "loss": 0.761, "step": 6630 }, { "epoch": 0.4516622736161891, "grad_norm": 0.2529158890247345, "learning_rate": 1.2405587204519984e-05, "loss": 0.7693, "step": 6640 }, { "epoch": 0.45234248788368336, "grad_norm": 0.2508241832256317, "learning_rate": 1.2383747122059054e-05, "loss": 0.7662, "step": 6650 }, { "epoch": 0.4530227021511776, "grad_norm": 0.2500368058681488, "learning_rate": 1.2361894977699651e-05, "loss": 0.7759, "step": 6660 }, { "epoch": 0.4537029164186719, "grad_norm": 0.24708276987075806, "learning_rate": 1.2340030882014892e-05, "loss": 0.7745, "step": 6670 }, { "epoch": 0.45438313068616615, "grad_norm": 0.25018182396888733, "learning_rate": 1.2318154945638368e-05, "loss": 0.7888, "step": 6680 }, { "epoch": 0.4550633449536604, "grad_norm": 0.24344618618488312, "learning_rate": 1.2296267279263586e-05, "loss": 0.7786, "step": 6690 }, { "epoch": 0.45574355922115467, "grad_norm": 0.24326354265213013, "learning_rate": 1.2274367993643399e-05, "loss": 0.7673, "step": 6700 }, { "epoch": 0.45642377348864893, "grad_norm": 0.24483905732631683, "learning_rate": 1.2252457199589464e-05, "loss": 0.7721, "step": 6710 }, { "epoch": 0.4571039877561432, "grad_norm": 0.2535827159881592, "learning_rate": 1.223053500797167e-05, "loss": 0.7679, "step": 6720 }, { "epoch": 0.45778420202363745, "grad_norm": 0.2524265646934509, "learning_rate": 1.2208601529717573e-05, "loss": 0.7804, "step": 6730 }, { "epoch": 0.4584644162911317, "grad_norm": 0.2596147060394287, "learning_rate": 1.2186656875811841e-05, "loss": 0.7721, "step": 6740 }, { "epoch": 0.459144630558626, "grad_norm": 0.2450697124004364, "learning_rate": 1.21647011572957e-05, "loss": 0.7761, "step": 6750 }, { "epoch": 0.45982484482612024, "grad_norm": 0.25916561484336853, "learning_rate": 1.214273448526635e-05, "loss": 0.7834, "step": 6760 }, { "epoch": 0.4605050590936145, "grad_norm": 0.2503756284713745, "learning_rate": 1.2120756970876429e-05, "loss": 0.7829, "step": 6770 }, { "epoch": 0.46118527336110876, "grad_norm": 0.24557699263095856, "learning_rate": 1.209876872533343e-05, "loss": 0.7839, "step": 6780 }, { "epoch": 0.461865487628603, "grad_norm": 0.2489149421453476, "learning_rate": 1.2076769859899149e-05, "loss": 0.7775, "step": 6790 }, { "epoch": 0.4625457018960973, "grad_norm": 0.25012078881263733, "learning_rate": 1.2054760485889116e-05, "loss": 0.7724, "step": 6800 }, { "epoch": 0.46322591616359154, "grad_norm": 0.2612060308456421, "learning_rate": 1.2032740714672044e-05, "loss": 0.7624, "step": 6810 }, { "epoch": 0.4639061304310858, "grad_norm": 0.24063514173030853, "learning_rate": 1.2010710657669246e-05, "loss": 0.7899, "step": 6820 }, { "epoch": 0.46458634469858007, "grad_norm": 0.25464415550231934, "learning_rate": 1.1988670426354087e-05, "loss": 0.7932, "step": 6830 }, { "epoch": 0.46526655896607433, "grad_norm": 0.2486640214920044, "learning_rate": 1.1966620132251414e-05, "loss": 0.7872, "step": 6840 }, { "epoch": 0.4659467732335686, "grad_norm": 0.2476149946451187, "learning_rate": 1.194455988693699e-05, "loss": 0.7583, "step": 6850 }, { "epoch": 0.46662698750106285, "grad_norm": 0.2592043876647949, "learning_rate": 1.192248980203694e-05, "loss": 0.7682, "step": 6860 }, { "epoch": 0.4673072017685571, "grad_norm": 0.2530962824821472, "learning_rate": 1.1900409989227167e-05, "loss": 0.7763, "step": 6870 }, { "epoch": 0.4679874160360514, "grad_norm": 0.24381905794143677, "learning_rate": 1.1878320560232801e-05, "loss": 0.7773, "step": 6880 }, { "epoch": 0.46866763030354563, "grad_norm": 0.24828781187534332, "learning_rate": 1.1856221626827635e-05, "loss": 0.7681, "step": 6890 }, { "epoch": 0.4693478445710399, "grad_norm": 0.25107139348983765, "learning_rate": 1.183411330083355e-05, "loss": 0.7798, "step": 6900 }, { "epoch": 0.47002805883853416, "grad_norm": 0.2627694308757782, "learning_rate": 1.1811995694119956e-05, "loss": 0.7785, "step": 6910 }, { "epoch": 0.4707082731060284, "grad_norm": 0.34391316771507263, "learning_rate": 1.1789868918603223e-05, "loss": 0.7668, "step": 6920 }, { "epoch": 0.4713884873735227, "grad_norm": 0.24955721199512482, "learning_rate": 1.1767733086246116e-05, "loss": 0.7582, "step": 6930 }, { "epoch": 0.47206870164101694, "grad_norm": 0.26111745834350586, "learning_rate": 1.1745588309057232e-05, "loss": 0.7702, "step": 6940 }, { "epoch": 0.4727489159085112, "grad_norm": 0.2527529299259186, "learning_rate": 1.1723434699090423e-05, "loss": 0.7683, "step": 6950 }, { "epoch": 0.47342913017600546, "grad_norm": 0.2523750960826874, "learning_rate": 1.1701272368444239e-05, "loss": 0.7865, "step": 6960 }, { "epoch": 0.4741093444434997, "grad_norm": 0.24606585502624512, "learning_rate": 1.1679101429261357e-05, "loss": 0.7718, "step": 6970 }, { "epoch": 0.474789558710994, "grad_norm": 0.24632257223129272, "learning_rate": 1.1656921993728013e-05, "loss": 0.759, "step": 6980 }, { "epoch": 0.47546977297848825, "grad_norm": 0.2509964406490326, "learning_rate": 1.1634734174073431e-05, "loss": 0.7706, "step": 6990 }, { "epoch": 0.4761499872459825, "grad_norm": 0.24110184609889984, "learning_rate": 1.161253808256927e-05, "loss": 0.7766, "step": 7000 }, { "epoch": 0.47683020151347677, "grad_norm": 0.264956533908844, "learning_rate": 1.1590333831529034e-05, "loss": 0.7798, "step": 7010 }, { "epoch": 0.47751041578097103, "grad_norm": 0.2527983486652374, "learning_rate": 1.1568121533307522e-05, "loss": 0.7744, "step": 7020 }, { "epoch": 0.4781906300484653, "grad_norm": 0.24862922728061676, "learning_rate": 1.1545901300300244e-05, "loss": 0.766, "step": 7030 }, { "epoch": 0.47887084431595955, "grad_norm": 0.25299084186553955, "learning_rate": 1.152367324494287e-05, "loss": 0.7594, "step": 7040 }, { "epoch": 0.4795510585834538, "grad_norm": 0.24184784293174744, "learning_rate": 1.1501437479710646e-05, "loss": 0.7669, "step": 7050 }, { "epoch": 0.480231272850948, "grad_norm": 0.25336650013923645, "learning_rate": 1.1479194117117833e-05, "loss": 0.7811, "step": 7060 }, { "epoch": 0.4809114871184423, "grad_norm": 0.25547850131988525, "learning_rate": 1.1456943269717133e-05, "loss": 0.7631, "step": 7070 }, { "epoch": 0.48159170138593654, "grad_norm": 0.24883776903152466, "learning_rate": 1.1434685050099122e-05, "loss": 0.7724, "step": 7080 }, { "epoch": 0.4822719156534308, "grad_norm": 0.2447323352098465, "learning_rate": 1.141241957089168e-05, "loss": 0.7641, "step": 7090 }, { "epoch": 0.48295212992092507, "grad_norm": 0.25550904870033264, "learning_rate": 1.1390146944759421e-05, "loss": 0.7855, "step": 7100 }, { "epoch": 0.4836323441884193, "grad_norm": 0.26494309306144714, "learning_rate": 1.1367867284403123e-05, "loss": 0.7702, "step": 7110 }, { "epoch": 0.4843125584559136, "grad_norm": 0.2557946443557739, "learning_rate": 1.1345580702559158e-05, "loss": 0.7699, "step": 7120 }, { "epoch": 0.48499277272340785, "grad_norm": 0.25180885195732117, "learning_rate": 1.1323287311998921e-05, "loss": 0.7787, "step": 7130 }, { "epoch": 0.4856729869909021, "grad_norm": 0.24362744390964508, "learning_rate": 1.1300987225528257e-05, "loss": 0.7743, "step": 7140 }, { "epoch": 0.4863532012583964, "grad_norm": 0.2375001758337021, "learning_rate": 1.1278680555986898e-05, "loss": 0.7703, "step": 7150 }, { "epoch": 0.48703341552589063, "grad_norm": 0.2449880689382553, "learning_rate": 1.1256367416247882e-05, "loss": 0.766, "step": 7160 }, { "epoch": 0.4877136297933849, "grad_norm": 0.25702130794525146, "learning_rate": 1.1234047919216986e-05, "loss": 0.7898, "step": 7170 }, { "epoch": 0.48839384406087916, "grad_norm": 0.254113107919693, "learning_rate": 1.1211722177832163e-05, "loss": 0.7806, "step": 7180 }, { "epoch": 0.4890740583283734, "grad_norm": 0.253002405166626, "learning_rate": 1.118939030506295e-05, "loss": 0.7797, "step": 7190 }, { "epoch": 0.4897542725958677, "grad_norm": 0.24663403630256653, "learning_rate": 1.1167052413909925e-05, "loss": 0.764, "step": 7200 }, { "epoch": 0.49043448686336194, "grad_norm": 0.2504352331161499, "learning_rate": 1.1144708617404101e-05, "loss": 0.7721, "step": 7210 }, { "epoch": 0.4911147011308562, "grad_norm": 0.25107845664024353, "learning_rate": 1.1122359028606391e-05, "loss": 0.7737, "step": 7220 }, { "epoch": 0.49179491539835046, "grad_norm": 0.2497549057006836, "learning_rate": 1.1100003760607001e-05, "loss": 0.7644, "step": 7230 }, { "epoch": 0.4924751296658447, "grad_norm": 0.2447914481163025, "learning_rate": 1.1077642926524887e-05, "loss": 0.7779, "step": 7240 }, { "epoch": 0.493155343933339, "grad_norm": 0.25073736906051636, "learning_rate": 1.1055276639507165e-05, "loss": 0.7662, "step": 7250 }, { "epoch": 0.49383555820083325, "grad_norm": 0.24794918298721313, "learning_rate": 1.1032905012728536e-05, "loss": 0.7868, "step": 7260 }, { "epoch": 0.4945157724683275, "grad_norm": 0.2526129484176636, "learning_rate": 1.1010528159390732e-05, "loss": 0.7812, "step": 7270 }, { "epoch": 0.49519598673582177, "grad_norm": 0.24096263945102692, "learning_rate": 1.0988146192721927e-05, "loss": 0.7626, "step": 7280 }, { "epoch": 0.49587620100331603, "grad_norm": 0.25271764397621155, "learning_rate": 1.0965759225976165e-05, "loss": 0.7688, "step": 7290 }, { "epoch": 0.4965564152708103, "grad_norm": 0.24893783032894135, "learning_rate": 1.0943367372432796e-05, "loss": 0.7684, "step": 7300 }, { "epoch": 0.49723662953830455, "grad_norm": 0.24898919463157654, "learning_rate": 1.0920970745395897e-05, "loss": 0.7688, "step": 7310 }, { "epoch": 0.4979168438057988, "grad_norm": 0.24341994524002075, "learning_rate": 1.0898569458193695e-05, "loss": 0.7841, "step": 7320 }, { "epoch": 0.4985970580732931, "grad_norm": 0.25512826442718506, "learning_rate": 1.0876163624178004e-05, "loss": 0.7722, "step": 7330 }, { "epoch": 0.49927727234078734, "grad_norm": 0.2560203969478607, "learning_rate": 1.0853753356723637e-05, "loss": 0.7592, "step": 7340 }, { "epoch": 0.4999574866082816, "grad_norm": 0.25013265013694763, "learning_rate": 1.0831338769227848e-05, "loss": 0.7797, "step": 7350 }, { "epoch": 0.5006377008757759, "grad_norm": 0.24355962872505188, "learning_rate": 1.0808919975109746e-05, "loss": 0.7728, "step": 7360 }, { "epoch": 0.5013179151432702, "grad_norm": 0.2452683001756668, "learning_rate": 1.0786497087809731e-05, "loss": 0.7852, "step": 7370 }, { "epoch": 0.5019981294107644, "grad_norm": 0.2576746642589569, "learning_rate": 1.0764070220788906e-05, "loss": 0.7715, "step": 7380 }, { "epoch": 0.5026783436782587, "grad_norm": 0.25701427459716797, "learning_rate": 1.074163948752852e-05, "loss": 0.7653, "step": 7390 }, { "epoch": 0.5033585579457529, "grad_norm": 0.2555880844593048, "learning_rate": 1.0719205001529382e-05, "loss": 0.7741, "step": 7400 }, { "epoch": 0.5040387722132472, "grad_norm": 0.24719035625457764, "learning_rate": 1.069676687631129e-05, "loss": 0.7655, "step": 7410 }, { "epoch": 0.5047189864807414, "grad_norm": 0.24667803943157196, "learning_rate": 1.0674325225412456e-05, "loss": 0.7597, "step": 7420 }, { "epoch": 0.5053992007482357, "grad_norm": 0.2403658777475357, "learning_rate": 1.065188016238893e-05, "loss": 0.7848, "step": 7430 }, { "epoch": 0.50607941501573, "grad_norm": 0.24025067687034607, "learning_rate": 1.062943180081404e-05, "loss": 0.7586, "step": 7440 }, { "epoch": 0.5067596292832243, "grad_norm": 0.25513070821762085, "learning_rate": 1.060698025427779e-05, "loss": 0.7797, "step": 7450 }, { "epoch": 0.5074398435507185, "grad_norm": 0.2516796290874481, "learning_rate": 1.0584525636386302e-05, "loss": 0.7764, "step": 7460 }, { "epoch": 0.5081200578182128, "grad_norm": 0.2510603666305542, "learning_rate": 1.0562068060761249e-05, "loss": 0.7775, "step": 7470 }, { "epoch": 0.508800272085707, "grad_norm": 0.24026992917060852, "learning_rate": 1.0539607641039258e-05, "loss": 0.7642, "step": 7480 }, { "epoch": 0.5094804863532013, "grad_norm": 0.24698439240455627, "learning_rate": 1.0517144490871358e-05, "loss": 0.7826, "step": 7490 }, { "epoch": 0.5101607006206955, "grad_norm": 0.2510314881801605, "learning_rate": 1.049467872392239e-05, "loss": 0.7712, "step": 7500 }, { "epoch": 0.5108409148881898, "grad_norm": 0.2500401437282562, "learning_rate": 1.0472210453870433e-05, "loss": 0.7804, "step": 7510 }, { "epoch": 0.511521129155684, "grad_norm": 0.25299274921417236, "learning_rate": 1.0449739794406236e-05, "loss": 0.7746, "step": 7520 }, { "epoch": 0.5122013434231782, "grad_norm": 0.2486591637134552, "learning_rate": 1.0427266859232634e-05, "loss": 0.7668, "step": 7530 }, { "epoch": 0.5128815576906726, "grad_norm": 0.2587043344974518, "learning_rate": 1.0404791762063983e-05, "loss": 0.7554, "step": 7540 }, { "epoch": 0.5135617719581668, "grad_norm": 0.2467149943113327, "learning_rate": 1.038231461662558e-05, "loss": 0.7643, "step": 7550 }, { "epoch": 0.5142419862256611, "grad_norm": 0.24870260059833527, "learning_rate": 1.0359835536653073e-05, "loss": 0.7725, "step": 7560 }, { "epoch": 0.5149222004931553, "grad_norm": 0.24920696020126343, "learning_rate": 1.0337354635891919e-05, "loss": 0.7609, "step": 7570 }, { "epoch": 0.5156024147606496, "grad_norm": 0.24223774671554565, "learning_rate": 1.031487202809677e-05, "loss": 0.762, "step": 7580 }, { "epoch": 0.5162826290281438, "grad_norm": 0.24686753749847412, "learning_rate": 1.0292387827030926e-05, "loss": 0.7804, "step": 7590 }, { "epoch": 0.5169628432956381, "grad_norm": 0.2514561712741852, "learning_rate": 1.0269902146465744e-05, "loss": 0.7664, "step": 7600 }, { "epoch": 0.5176430575631323, "grad_norm": 0.24237996339797974, "learning_rate": 1.0247415100180073e-05, "loss": 0.7754, "step": 7610 }, { "epoch": 0.5183232718306267, "grad_norm": 0.2523074448108673, "learning_rate": 1.0224926801959672e-05, "loss": 0.7787, "step": 7620 }, { "epoch": 0.5190034860981209, "grad_norm": 0.25330936908721924, "learning_rate": 1.0202437365596625e-05, "loss": 0.78, "step": 7630 }, { "epoch": 0.5196837003656152, "grad_norm": 0.25370344519615173, "learning_rate": 1.0179946904888784e-05, "loss": 0.7701, "step": 7640 }, { "epoch": 0.5203639146331094, "grad_norm": 0.2527335584163666, "learning_rate": 1.0157455533639187e-05, "loss": 0.776, "step": 7650 }, { "epoch": 0.5210441289006037, "grad_norm": 0.2460501492023468, "learning_rate": 1.0134963365655473e-05, "loss": 0.7722, "step": 7660 }, { "epoch": 0.5217243431680979, "grad_norm": 0.26167675852775574, "learning_rate": 1.0112470514749311e-05, "loss": 0.7699, "step": 7670 }, { "epoch": 0.5224045574355922, "grad_norm": 0.24475277960300446, "learning_rate": 1.0089977094735833e-05, "loss": 0.7729, "step": 7680 }, { "epoch": 0.5230847717030864, "grad_norm": 0.24689598381519318, "learning_rate": 1.0067483219433045e-05, "loss": 0.7825, "step": 7690 }, { "epoch": 0.5237649859705807, "grad_norm": 0.2483471781015396, "learning_rate": 1.0044989002661258e-05, "loss": 0.7669, "step": 7700 }, { "epoch": 0.524445200238075, "grad_norm": 0.24557854235172272, "learning_rate": 1.0022494558242513e-05, "loss": 0.7779, "step": 7710 }, { "epoch": 0.5251254145055693, "grad_norm": 0.2462674230337143, "learning_rate": 1e-05, "loss": 0.7778, "step": 7720 }, { "epoch": 0.5258056287730635, "grad_norm": 0.2452927678823471, "learning_rate": 9.977505441757488e-06, "loss": 0.7751, "step": 7730 }, { "epoch": 0.5264858430405578, "grad_norm": 0.24400685727596283, "learning_rate": 9.955010997338745e-06, "loss": 0.7662, "step": 7740 }, { "epoch": 0.527166057308052, "grad_norm": 0.2558972239494324, "learning_rate": 9.932516780566959e-06, "loss": 0.7668, "step": 7750 }, { "epoch": 0.5278462715755463, "grad_norm": 0.24762921035289764, "learning_rate": 9.91002290526417e-06, "loss": 0.779, "step": 7760 }, { "epoch": 0.5285264858430405, "grad_norm": 0.26425403356552124, "learning_rate": 9.887529485250692e-06, "loss": 0.7688, "step": 7770 }, { "epoch": 0.5292067001105348, "grad_norm": 0.25996628403663635, "learning_rate": 9.86503663434453e-06, "loss": 0.765, "step": 7780 }, { "epoch": 0.529886914378029, "grad_norm": 0.24393202364444733, "learning_rate": 9.842544466360814e-06, "loss": 0.7821, "step": 7790 }, { "epoch": 0.5305671286455234, "grad_norm": 0.24665607511997223, "learning_rate": 9.820053095111218e-06, "loss": 0.7602, "step": 7800 }, { "epoch": 0.5312473429130176, "grad_norm": 0.2442709356546402, "learning_rate": 9.79756263440338e-06, "loss": 0.7492, "step": 7810 }, { "epoch": 0.5319275571805119, "grad_norm": 0.25507017970085144, "learning_rate": 9.775073198040332e-06, "loss": 0.7703, "step": 7820 }, { "epoch": 0.5326077714480061, "grad_norm": 0.24093185365200043, "learning_rate": 9.752584899819929e-06, "loss": 0.7641, "step": 7830 }, { "epoch": 0.5332879857155004, "grad_norm": 0.24089796841144562, "learning_rate": 9.730097853534258e-06, "loss": 0.75, "step": 7840 }, { "epoch": 0.5339681999829946, "grad_norm": 0.2501702606678009, "learning_rate": 9.707612172969077e-06, "loss": 0.7721, "step": 7850 }, { "epoch": 0.5346484142504889, "grad_norm": 0.24304954707622528, "learning_rate": 9.685127971903232e-06, "loss": 0.7709, "step": 7860 }, { "epoch": 0.5353286285179831, "grad_norm": 0.2563300132751465, "learning_rate": 9.662645364108086e-06, "loss": 0.778, "step": 7870 }, { "epoch": 0.5360088427854774, "grad_norm": 0.26332852244377136, "learning_rate": 9.640164463346929e-06, "loss": 0.7792, "step": 7880 }, { "epoch": 0.5366890570529717, "grad_norm": 0.2405518740415573, "learning_rate": 9.617685383374424e-06, "loss": 0.7722, "step": 7890 }, { "epoch": 0.537369271320466, "grad_norm": 0.24706117808818817, "learning_rate": 9.595208237936018e-06, "loss": 0.7503, "step": 7900 }, { "epoch": 0.5380494855879602, "grad_norm": 0.24360667169094086, "learning_rate": 9.57273314076737e-06, "loss": 0.7728, "step": 7910 }, { "epoch": 0.5387296998554545, "grad_norm": 0.2456994205713272, "learning_rate": 9.55026020559377e-06, "loss": 0.7776, "step": 7920 }, { "epoch": 0.5394099141229487, "grad_norm": 0.24596786499023438, "learning_rate": 9.52778954612957e-06, "loss": 0.7758, "step": 7930 }, { "epoch": 0.540090128390443, "grad_norm": 0.2587563991546631, "learning_rate": 9.505321276077614e-06, "loss": 0.784, "step": 7940 }, { "epoch": 0.5407703426579372, "grad_norm": 0.2651641368865967, "learning_rate": 9.482855509128644e-06, "loss": 0.7533, "step": 7950 }, { "epoch": 0.5414505569254315, "grad_norm": 0.30029022693634033, "learning_rate": 9.460392358960743e-06, "loss": 0.7892, "step": 7960 }, { "epoch": 0.5421307711929257, "grad_norm": 0.25703874230384827, "learning_rate": 9.437931939238755e-06, "loss": 0.775, "step": 7970 }, { "epoch": 0.5428109854604201, "grad_norm": 0.25282925367355347, "learning_rate": 9.415474363613701e-06, "loss": 0.7727, "step": 7980 }, { "epoch": 0.5434911997279143, "grad_norm": 0.2516460716724396, "learning_rate": 9.393019745722215e-06, "loss": 0.7732, "step": 7990 }, { "epoch": 0.5441714139954086, "grad_norm": 0.2425277680158615, "learning_rate": 9.370568199185963e-06, "loss": 0.7753, "step": 8000 }, { "epoch": 0.5448516282629028, "grad_norm": 0.24489714205265045, "learning_rate": 9.348119837611071e-06, "loss": 0.7874, "step": 8010 }, { "epoch": 0.5455318425303971, "grad_norm": 0.24812796711921692, "learning_rate": 9.32567477458755e-06, "loss": 0.7665, "step": 8020 }, { "epoch": 0.5462120567978913, "grad_norm": 2.061338424682617, "learning_rate": 9.303233123688716e-06, "loss": 0.7758, "step": 8030 }, { "epoch": 0.5468922710653856, "grad_norm": 0.24883481860160828, "learning_rate": 9.280794998470623e-06, "loss": 0.7652, "step": 8040 }, { "epoch": 0.5475724853328798, "grad_norm": 0.24857421219348907, "learning_rate": 9.258360512471484e-06, "loss": 0.7588, "step": 8050 }, { "epoch": 0.5482526996003741, "grad_norm": 0.2536051273345947, "learning_rate": 9.235929779211099e-06, "loss": 0.7665, "step": 8060 }, { "epoch": 0.5489329138678684, "grad_norm": 0.24272532761096954, "learning_rate": 9.213502912190275e-06, "loss": 0.7781, "step": 8070 }, { "epoch": 0.5496131281353627, "grad_norm": 0.24849000573158264, "learning_rate": 9.191080024890257e-06, "loss": 0.7675, "step": 8080 }, { "epoch": 0.5502933424028569, "grad_norm": 0.2545180022716522, "learning_rate": 9.168661230772157e-06, "loss": 0.7654, "step": 8090 }, { "epoch": 0.5509735566703512, "grad_norm": 0.2520526945590973, "learning_rate": 9.146246643276368e-06, "loss": 0.7686, "step": 8100 }, { "epoch": 0.5516537709378454, "grad_norm": 0.2500572204589844, "learning_rate": 9.123836375822002e-06, "loss": 0.769, "step": 8110 }, { "epoch": 0.5523339852053397, "grad_norm": 0.2473742514848709, "learning_rate": 9.101430541806308e-06, "loss": 0.7546, "step": 8120 }, { "epoch": 0.5530141994728339, "grad_norm": 0.27262288331985474, "learning_rate": 9.079029254604108e-06, "loss": 0.7804, "step": 8130 }, { "epoch": 0.5536944137403282, "grad_norm": 0.23986180126667023, "learning_rate": 9.056632627567209e-06, "loss": 0.7862, "step": 8140 }, { "epoch": 0.5543746280078224, "grad_norm": 0.2418127804994583, "learning_rate": 9.03424077402384e-06, "loss": 0.7768, "step": 8150 }, { "epoch": 0.5550548422753168, "grad_norm": 0.24957619607448578, "learning_rate": 9.01185380727808e-06, "loss": 0.7635, "step": 8160 }, { "epoch": 0.555735056542811, "grad_norm": 0.24887363612651825, "learning_rate": 8.989471840609273e-06, "loss": 0.7594, "step": 8170 }, { "epoch": 0.5564152708103053, "grad_norm": 0.24794262647628784, "learning_rate": 8.967094987271469e-06, "loss": 0.768, "step": 8180 }, { "epoch": 0.5570954850777995, "grad_norm": 0.24824637174606323, "learning_rate": 8.944723360492842e-06, "loss": 0.7641, "step": 8190 }, { "epoch": 0.5577756993452938, "grad_norm": 0.2551088333129883, "learning_rate": 8.922357073475116e-06, "loss": 0.7946, "step": 8200 }, { "epoch": 0.558455913612788, "grad_norm": 0.2511906325817108, "learning_rate": 8.899996239393002e-06, "loss": 0.7699, "step": 8210 }, { "epoch": 0.5591361278802823, "grad_norm": 0.24231868982315063, "learning_rate": 8.877640971393615e-06, "loss": 0.7673, "step": 8220 }, { "epoch": 0.5598163421477765, "grad_norm": 0.25138306617736816, "learning_rate": 8.855291382595904e-06, "loss": 0.7619, "step": 8230 }, { "epoch": 0.5604965564152709, "grad_norm": 0.2524348199367523, "learning_rate": 8.832947586090082e-06, "loss": 0.7649, "step": 8240 }, { "epoch": 0.5611767706827651, "grad_norm": 0.25453469157218933, "learning_rate": 8.810609694937053e-06, "loss": 0.7788, "step": 8250 }, { "epoch": 0.5618569849502594, "grad_norm": 0.2537672221660614, "learning_rate": 8.788277822167843e-06, "loss": 0.7643, "step": 8260 }, { "epoch": 0.5625371992177536, "grad_norm": 0.2489592581987381, "learning_rate": 8.765952080783019e-06, "loss": 0.7741, "step": 8270 }, { "epoch": 0.5632174134852479, "grad_norm": 0.24671168625354767, "learning_rate": 8.743632583752123e-06, "loss": 0.7627, "step": 8280 }, { "epoch": 0.5638976277527421, "grad_norm": 0.24434447288513184, "learning_rate": 8.721319444013108e-06, "loss": 0.7745, "step": 8290 }, { "epoch": 0.5645778420202364, "grad_norm": 0.24269433319568634, "learning_rate": 8.699012774471748e-06, "loss": 0.7675, "step": 8300 }, { "epoch": 0.5652580562877306, "grad_norm": 0.24096103012561798, "learning_rate": 8.676712688001086e-06, "loss": 0.7546, "step": 8310 }, { "epoch": 0.5659382705552249, "grad_norm": 0.2548983693122864, "learning_rate": 8.654419297440844e-06, "loss": 0.7611, "step": 8320 }, { "epoch": 0.5666184848227191, "grad_norm": 0.24676938354969025, "learning_rate": 8.63213271559688e-06, "loss": 0.7732, "step": 8330 }, { "epoch": 0.5672986990902135, "grad_norm": 0.24426358938217163, "learning_rate": 8.609853055240582e-06, "loss": 0.7803, "step": 8340 }, { "epoch": 0.5679789133577077, "grad_norm": 0.24262279272079468, "learning_rate": 8.587580429108323e-06, "loss": 0.7685, "step": 8350 }, { "epoch": 0.568659127625202, "grad_norm": 0.2493383139371872, "learning_rate": 8.56531494990088e-06, "loss": 0.7759, "step": 8360 }, { "epoch": 0.5693393418926962, "grad_norm": 0.2447572648525238, "learning_rate": 8.54305673028287e-06, "loss": 0.7655, "step": 8370 }, { "epoch": 0.5700195561601905, "grad_norm": 0.24559731781482697, "learning_rate": 8.520805882882168e-06, "loss": 0.7717, "step": 8380 }, { "epoch": 0.5706997704276847, "grad_norm": 0.23603376746177673, "learning_rate": 8.498562520289356e-06, "loss": 0.7702, "step": 8390 }, { "epoch": 0.571379984695179, "grad_norm": 0.24445731937885284, "learning_rate": 8.476326755057131e-06, "loss": 0.7674, "step": 8400 }, { "epoch": 0.5720601989626732, "grad_norm": 0.24234746396541595, "learning_rate": 8.454098699699759e-06, "loss": 0.7717, "step": 8410 }, { "epoch": 0.5727404132301676, "grad_norm": 0.2467491179704666, "learning_rate": 8.431878466692482e-06, "loss": 0.7736, "step": 8420 }, { "epoch": 0.5734206274976618, "grad_norm": 0.812633752822876, "learning_rate": 8.409666168470967e-06, "loss": 0.7578, "step": 8430 }, { "epoch": 0.5741008417651561, "grad_norm": 0.25730645656585693, "learning_rate": 8.387461917430732e-06, "loss": 0.7726, "step": 8440 }, { "epoch": 0.5747810560326503, "grad_norm": 0.24951675534248352, "learning_rate": 8.36526582592657e-06, "loss": 0.7812, "step": 8450 }, { "epoch": 0.5754612703001446, "grad_norm": 0.24625453352928162, "learning_rate": 8.343078006271989e-06, "loss": 0.764, "step": 8460 }, { "epoch": 0.5761414845676388, "grad_norm": 0.25189143419265747, "learning_rate": 8.320898570738645e-06, "loss": 0.7741, "step": 8470 }, { "epoch": 0.576821698835133, "grad_norm": 0.2517893314361572, "learning_rate": 8.298727631555761e-06, "loss": 0.772, "step": 8480 }, { "epoch": 0.5775019131026273, "grad_norm": 0.24455204606056213, "learning_rate": 8.276565300909577e-06, "loss": 0.7791, "step": 8490 }, { "epoch": 0.5781821273701215, "grad_norm": 0.24319756031036377, "learning_rate": 8.254411690942768e-06, "loss": 0.7697, "step": 8500 }, { "epoch": 0.5788623416376159, "grad_norm": 0.2484409362077713, "learning_rate": 8.232266913753883e-06, "loss": 0.7738, "step": 8510 }, { "epoch": 0.5795425559051101, "grad_norm": 0.25438666343688965, "learning_rate": 8.210131081396779e-06, "loss": 0.7666, "step": 8520 }, { "epoch": 0.5802227701726044, "grad_norm": 0.2526380717754364, "learning_rate": 8.188004305880046e-06, "loss": 0.7708, "step": 8530 }, { "epoch": 0.5809029844400986, "grad_norm": 0.2531062662601471, "learning_rate": 8.165886699166452e-06, "loss": 0.7688, "step": 8540 }, { "epoch": 0.5815831987075929, "grad_norm": 0.261444091796875, "learning_rate": 8.143778373172367e-06, "loss": 0.7732, "step": 8550 }, { "epoch": 0.5822634129750871, "grad_norm": 0.24088214337825775, "learning_rate": 8.1216794397672e-06, "loss": 0.7637, "step": 8560 }, { "epoch": 0.5829436272425814, "grad_norm": 0.2516765296459198, "learning_rate": 8.099590010772835e-06, "loss": 0.7683, "step": 8570 }, { "epoch": 0.5836238415100756, "grad_norm": 0.24979981780052185, "learning_rate": 8.077510197963061e-06, "loss": 0.7773, "step": 8580 }, { "epoch": 0.5843040557775699, "grad_norm": 0.25018417835235596, "learning_rate": 8.055440113063008e-06, "loss": 0.7824, "step": 8590 }, { "epoch": 0.5849842700450641, "grad_norm": 0.2482062429189682, "learning_rate": 8.033379867748587e-06, "loss": 0.7703, "step": 8600 }, { "epoch": 0.5856644843125585, "grad_norm": 0.24455052614212036, "learning_rate": 8.011329573645915e-06, "loss": 0.7753, "step": 8610 }, { "epoch": 0.5863446985800527, "grad_norm": 0.24979741871356964, "learning_rate": 7.989289342330757e-06, "loss": 0.7749, "step": 8620 }, { "epoch": 0.587024912847547, "grad_norm": 0.2415253072977066, "learning_rate": 7.96725928532796e-06, "loss": 0.7614, "step": 8630 }, { "epoch": 0.5877051271150412, "grad_norm": 0.24844114482402802, "learning_rate": 7.945239514110886e-06, "loss": 0.7707, "step": 8640 }, { "epoch": 0.5883853413825355, "grad_norm": 0.2513725459575653, "learning_rate": 7.923230140100855e-06, "loss": 0.7574, "step": 8650 }, { "epoch": 0.5890655556500297, "grad_norm": 0.2472066730260849, "learning_rate": 7.901231274666574e-06, "loss": 0.7729, "step": 8660 }, { "epoch": 0.589745769917524, "grad_norm": 0.2509872019290924, "learning_rate": 7.879243029123573e-06, "loss": 0.7644, "step": 8670 }, { "epoch": 0.5904259841850182, "grad_norm": 0.24720162153244019, "learning_rate": 7.857265514733651e-06, "loss": 0.7644, "step": 8680 }, { "epoch": 0.5911061984525126, "grad_norm": 0.2582946717739105, "learning_rate": 7.835298842704303e-06, "loss": 0.7824, "step": 8690 }, { "epoch": 0.5917864127200068, "grad_norm": 0.25611555576324463, "learning_rate": 7.81334312418816e-06, "loss": 0.7704, "step": 8700 }, { "epoch": 0.5924666269875011, "grad_norm": 0.24672120809555054, "learning_rate": 7.79139847028243e-06, "loss": 0.7535, "step": 8710 }, { "epoch": 0.5931468412549953, "grad_norm": 0.25224679708480835, "learning_rate": 7.769464992028332e-06, "loss": 0.7689, "step": 8720 }, { "epoch": 0.5938270555224896, "grad_norm": 0.2413455694913864, "learning_rate": 7.747542800410538e-06, "loss": 0.7705, "step": 8730 }, { "epoch": 0.5945072697899838, "grad_norm": 0.24807719886302948, "learning_rate": 7.725632006356603e-06, "loss": 0.7599, "step": 8740 }, { "epoch": 0.5951874840574781, "grad_norm": 0.2648003399372101, "learning_rate": 7.70373272073642e-06, "loss": 0.7611, "step": 8750 }, { "epoch": 0.5958676983249723, "grad_norm": 0.25700241327285767, "learning_rate": 7.681845054361634e-06, "loss": 0.7728, "step": 8760 }, { "epoch": 0.5965479125924666, "grad_norm": 0.25815171003341675, "learning_rate": 7.659969117985111e-06, "loss": 0.7685, "step": 8770 }, { "epoch": 0.5972281268599609, "grad_norm": 0.25158023834228516, "learning_rate": 7.638105022300352e-06, "loss": 0.7718, "step": 8780 }, { "epoch": 0.5979083411274552, "grad_norm": 0.24402719736099243, "learning_rate": 7.6162528779409506e-06, "loss": 0.7623, "step": 8790 }, { "epoch": 0.5985885553949494, "grad_norm": 0.24518702924251556, "learning_rate": 7.59441279548002e-06, "loss": 0.7688, "step": 8800 }, { "epoch": 0.5992687696624437, "grad_norm": 0.2488635927438736, "learning_rate": 7.5725848854296425e-06, "loss": 0.7697, "step": 8810 }, { "epoch": 0.5999489839299379, "grad_norm": 0.24867048859596252, "learning_rate": 7.550769258240307e-06, "loss": 0.7808, "step": 8820 }, { "epoch": 0.6006291981974322, "grad_norm": 0.2935914099216461, "learning_rate": 7.528966024300352e-06, "loss": 0.7627, "step": 8830 }, { "epoch": 0.6013094124649264, "grad_norm": 0.2575988173484802, "learning_rate": 7.507175293935401e-06, "loss": 0.7762, "step": 8840 }, { "epoch": 0.6019896267324207, "grad_norm": 0.24520112574100494, "learning_rate": 7.485397177407814e-06, "loss": 0.763, "step": 8850 }, { "epoch": 0.6026698409999149, "grad_norm": 0.2443533092737198, "learning_rate": 7.463631784916121e-06, "loss": 0.759, "step": 8860 }, { "epoch": 0.6033500552674093, "grad_norm": 0.24732626974582672, "learning_rate": 7.4418792265944686e-06, "loss": 0.7703, "step": 8870 }, { "epoch": 0.6040302695349035, "grad_norm": 0.24233926832675934, "learning_rate": 7.420139612512061e-06, "loss": 0.7449, "step": 8880 }, { "epoch": 0.6047104838023978, "grad_norm": 0.24057942628860474, "learning_rate": 7.398413052672604e-06, "loss": 0.7706, "step": 8890 }, { "epoch": 0.605390698069892, "grad_norm": 0.24792107939720154, "learning_rate": 7.376699657013747e-06, "loss": 0.7723, "step": 8900 }, { "epoch": 0.6060709123373863, "grad_norm": 0.24438060820102692, "learning_rate": 7.354999535406533e-06, "loss": 0.7583, "step": 8910 }, { "epoch": 0.6067511266048805, "grad_norm": 0.24966730177402496, "learning_rate": 7.3333127976548305e-06, "loss": 0.7619, "step": 8920 }, { "epoch": 0.6074313408723748, "grad_norm": 0.25231412053108215, "learning_rate": 7.31163955349479e-06, "loss": 0.7621, "step": 8930 }, { "epoch": 0.608111555139869, "grad_norm": 0.24898163974285126, "learning_rate": 7.289979912594283e-06, "loss": 0.7573, "step": 8940 }, { "epoch": 0.6087917694073633, "grad_norm": 0.248761385679245, "learning_rate": 7.268333984552345e-06, "loss": 0.7686, "step": 8950 }, { "epoch": 0.6094719836748576, "grad_norm": 0.24428556859493256, "learning_rate": 7.2467018788986256e-06, "loss": 0.7784, "step": 8960 }, { "epoch": 0.6101521979423519, "grad_norm": 0.2532806694507599, "learning_rate": 7.2250837050928324e-06, "loss": 0.7757, "step": 8970 }, { "epoch": 0.6108324122098461, "grad_norm": 0.2456384152173996, "learning_rate": 7.203479572524177e-06, "loss": 0.7804, "step": 8980 }, { "epoch": 0.6115126264773404, "grad_norm": 0.252175897359848, "learning_rate": 7.18188959051082e-06, "loss": 0.7613, "step": 8990 }, { "epoch": 0.6121928407448346, "grad_norm": 0.24484005570411682, "learning_rate": 7.160313868299321e-06, "loss": 0.753, "step": 9000 }, { "epoch": 0.6128730550123289, "grad_norm": 0.2501670718193054, "learning_rate": 7.138752515064085e-06, "loss": 0.7759, "step": 9010 }, { "epoch": 0.6135532692798231, "grad_norm": 0.24075950682163239, "learning_rate": 7.117205639906806e-06, "loss": 0.7577, "step": 9020 }, { "epoch": 0.6142334835473174, "grad_norm": 0.26213377714157104, "learning_rate": 7.095673351855923e-06, "loss": 0.7673, "step": 9030 }, { "epoch": 0.6149136978148116, "grad_norm": 0.23907162249088287, "learning_rate": 7.074155759866057e-06, "loss": 0.7665, "step": 9040 }, { "epoch": 0.615593912082306, "grad_norm": 0.25060412287712097, "learning_rate": 7.052652972817471e-06, "loss": 0.7637, "step": 9050 }, { "epoch": 0.6162741263498002, "grad_norm": 0.24636393785476685, "learning_rate": 7.031165099515514e-06, "loss": 0.7672, "step": 9060 }, { "epoch": 0.6169543406172945, "grad_norm": 0.25181058049201965, "learning_rate": 7.009692248690066e-06, "loss": 0.7782, "step": 9070 }, { "epoch": 0.6176345548847887, "grad_norm": 0.2514166235923767, "learning_rate": 6.988234528994997e-06, "loss": 0.7606, "step": 9080 }, { "epoch": 0.618314769152283, "grad_norm": 0.25581368803977966, "learning_rate": 6.966792049007613e-06, "loss": 0.7834, "step": 9090 }, { "epoch": 0.6189949834197772, "grad_norm": 0.24559302628040314, "learning_rate": 6.945364917228101e-06, "loss": 0.7753, "step": 9100 }, { "epoch": 0.6196751976872715, "grad_norm": 0.24848997592926025, "learning_rate": 6.923953242078992e-06, "loss": 0.7602, "step": 9110 }, { "epoch": 0.6203554119547657, "grad_norm": 0.2535330355167389, "learning_rate": 6.902557131904602e-06, "loss": 0.7782, "step": 9120 }, { "epoch": 0.62103562622226, "grad_norm": 0.248373344540596, "learning_rate": 6.881176694970483e-06, "loss": 0.7726, "step": 9130 }, { "epoch": 0.6217158404897543, "grad_norm": 0.24924324452877045, "learning_rate": 6.8598120394628875e-06, "loss": 0.7712, "step": 9140 }, { "epoch": 0.6223960547572486, "grad_norm": 0.24666650593280792, "learning_rate": 6.83846327348821e-06, "loss": 0.7541, "step": 9150 }, { "epoch": 0.6230762690247428, "grad_norm": 0.23946702480316162, "learning_rate": 6.817130505072442e-06, "loss": 0.7625, "step": 9160 }, { "epoch": 0.6237564832922371, "grad_norm": 0.24774959683418274, "learning_rate": 6.795813842160626e-06, "loss": 0.7658, "step": 9170 }, { "epoch": 0.6244366975597313, "grad_norm": 0.25880733132362366, "learning_rate": 6.774513392616311e-06, "loss": 0.7574, "step": 9180 }, { "epoch": 0.6251169118272256, "grad_norm": 0.24910889565944672, "learning_rate": 6.753229264221005e-06, "loss": 0.7717, "step": 9190 }, { "epoch": 0.6257971260947198, "grad_norm": 0.251426637172699, "learning_rate": 6.73196156467363e-06, "loss": 0.7683, "step": 9200 }, { "epoch": 0.6264773403622141, "grad_norm": 0.24830366671085358, "learning_rate": 6.7107104015899795e-06, "loss": 0.7615, "step": 9210 }, { "epoch": 0.6271575546297083, "grad_norm": 0.24772919714450836, "learning_rate": 6.689475882502167e-06, "loss": 0.7758, "step": 9220 }, { "epoch": 0.6278377688972027, "grad_norm": 0.2580617368221283, "learning_rate": 6.668258114858088e-06, "loss": 0.7655, "step": 9230 }, { "epoch": 0.6285179831646969, "grad_norm": 0.24163033068180084, "learning_rate": 6.64705720602088e-06, "loss": 0.7665, "step": 9240 }, { "epoch": 0.6291981974321912, "grad_norm": 0.25090691447257996, "learning_rate": 6.625873263268363e-06, "loss": 0.7555, "step": 9250 }, { "epoch": 0.6298784116996854, "grad_norm": 0.2470577210187912, "learning_rate": 6.604706393792517e-06, "loss": 0.7617, "step": 9260 }, { "epoch": 0.6305586259671797, "grad_norm": 0.2557064890861511, "learning_rate": 6.583556704698928e-06, "loss": 0.7596, "step": 9270 }, { "epoch": 0.6312388402346739, "grad_norm": 0.24683815240859985, "learning_rate": 6.5624243030062475e-06, "loss": 0.7646, "step": 9280 }, { "epoch": 0.6319190545021682, "grad_norm": 0.26031729578971863, "learning_rate": 6.54130929564565e-06, "loss": 0.7651, "step": 9290 }, { "epoch": 0.6325992687696624, "grad_norm": 0.25592610239982605, "learning_rate": 6.520211789460298e-06, "loss": 0.7561, "step": 9300 }, { "epoch": 0.6332794830371568, "grad_norm": 0.24974516034126282, "learning_rate": 6.499131891204792e-06, "loss": 0.7637, "step": 9310 }, { "epoch": 0.633959697304651, "grad_norm": 0.25895562767982483, "learning_rate": 6.478069707544639e-06, "loss": 0.7581, "step": 9320 }, { "epoch": 0.6346399115721453, "grad_norm": 0.25263655185699463, "learning_rate": 6.457025345055708e-06, "loss": 0.769, "step": 9330 }, { "epoch": 0.6353201258396395, "grad_norm": 0.24764102697372437, "learning_rate": 6.435998910223691e-06, "loss": 0.7653, "step": 9340 }, { "epoch": 0.6360003401071338, "grad_norm": 0.25428757071495056, "learning_rate": 6.414990509443566e-06, "loss": 0.7776, "step": 9350 }, { "epoch": 0.636680554374628, "grad_norm": 0.24425633251667023, "learning_rate": 6.394000249019059e-06, "loss": 0.7642, "step": 9360 }, { "epoch": 0.6373607686421223, "grad_norm": 0.24660134315490723, "learning_rate": 6.373028235162102e-06, "loss": 0.7751, "step": 9370 }, { "epoch": 0.6380409829096165, "grad_norm": 0.253810316324234, "learning_rate": 6.3520745739923e-06, "loss": 0.767, "step": 9380 }, { "epoch": 0.6387211971771108, "grad_norm": 0.24733662605285645, "learning_rate": 6.33113937153639e-06, "loss": 0.7664, "step": 9390 }, { "epoch": 0.639401411444605, "grad_norm": 0.24834582209587097, "learning_rate": 6.310222733727711e-06, "loss": 0.7662, "step": 9400 }, { "epoch": 0.6400816257120993, "grad_norm": 0.2556808888912201, "learning_rate": 6.28932476640566e-06, "loss": 0.7766, "step": 9410 }, { "epoch": 0.6407618399795936, "grad_norm": 0.2530834376811981, "learning_rate": 6.268445575315164e-06, "loss": 0.7675, "step": 9420 }, { "epoch": 0.6414420542470878, "grad_norm": 0.25645995140075684, "learning_rate": 6.2475852661061364e-06, "loss": 0.7681, "step": 9430 }, { "epoch": 0.6421222685145821, "grad_norm": 0.24543169140815735, "learning_rate": 6.226743944332954e-06, "loss": 0.771, "step": 9440 }, { "epoch": 0.6428024827820763, "grad_norm": 0.25770193338394165, "learning_rate": 6.205921715453904e-06, "loss": 0.7671, "step": 9450 }, { "epoch": 0.6434826970495706, "grad_norm": 0.2512899935245514, "learning_rate": 6.1851186848306775e-06, "loss": 0.7561, "step": 9460 }, { "epoch": 0.6441629113170648, "grad_norm": 0.2442668378353119, "learning_rate": 6.16433495772781e-06, "loss": 0.7712, "step": 9470 }, { "epoch": 0.6448431255845591, "grad_norm": 0.24152372777462006, "learning_rate": 6.143570639312166e-06, "loss": 0.7517, "step": 9480 }, { "epoch": 0.6455233398520533, "grad_norm": 0.24572031199932098, "learning_rate": 6.122825834652399e-06, "loss": 0.7654, "step": 9490 }, { "epoch": 0.6462035541195477, "grad_norm": 0.24191130697727203, "learning_rate": 6.102100648718416e-06, "loss": 0.7525, "step": 9500 }, { "epoch": 0.6468837683870419, "grad_norm": 0.25088223814964294, "learning_rate": 6.081395186380868e-06, "loss": 0.7699, "step": 9510 }, { "epoch": 0.6475639826545362, "grad_norm": 0.24233394861221313, "learning_rate": 6.060709552410591e-06, "loss": 0.7503, "step": 9520 }, { "epoch": 0.6482441969220304, "grad_norm": 0.2550160586833954, "learning_rate": 6.0400438514780854e-06, "loss": 0.7496, "step": 9530 }, { "epoch": 0.6489244111895247, "grad_norm": 0.251807302236557, "learning_rate": 6.019398188153e-06, "loss": 0.7519, "step": 9540 }, { "epoch": 0.6496046254570189, "grad_norm": 0.24086961150169373, "learning_rate": 5.998772666903583e-06, "loss": 0.754, "step": 9550 }, { "epoch": 0.6502848397245132, "grad_norm": 0.24868667125701904, "learning_rate": 5.978167392096168e-06, "loss": 0.7541, "step": 9560 }, { "epoch": 0.6509650539920074, "grad_norm": 0.24697424471378326, "learning_rate": 5.957582467994641e-06, "loss": 0.7684, "step": 9570 }, { "epoch": 0.6516452682595018, "grad_norm": 0.2440696656703949, "learning_rate": 5.937017998759904e-06, "loss": 0.7555, "step": 9580 }, { "epoch": 0.652325482526996, "grad_norm": 0.24430793523788452, "learning_rate": 5.916474088449366e-06, "loss": 0.7649, "step": 9590 }, { "epoch": 0.6530056967944903, "grad_norm": 0.24338242411613464, "learning_rate": 5.895950841016404e-06, "loss": 0.769, "step": 9600 }, { "epoch": 0.6536859110619845, "grad_norm": 0.24120405316352844, "learning_rate": 5.875448360309837e-06, "loss": 0.7618, "step": 9610 }, { "epoch": 0.6543661253294788, "grad_norm": 0.24697110056877136, "learning_rate": 5.854966750073403e-06, "loss": 0.7619, "step": 9620 }, { "epoch": 0.655046339596973, "grad_norm": 0.24529220163822174, "learning_rate": 5.834506113945237e-06, "loss": 0.7493, "step": 9630 }, { "epoch": 0.6557265538644673, "grad_norm": 0.2561686336994171, "learning_rate": 5.814066555457343e-06, "loss": 0.7613, "step": 9640 }, { "epoch": 0.6564067681319615, "grad_norm": 0.2434144914150238, "learning_rate": 5.7936481780350705e-06, "loss": 0.7611, "step": 9650 }, { "epoch": 0.6570869823994558, "grad_norm": 0.25305524468421936, "learning_rate": 5.773251084996591e-06, "loss": 0.7662, "step": 9660 }, { "epoch": 0.65776719666695, "grad_norm": 0.24415530264377594, "learning_rate": 5.752875379552378e-06, "loss": 0.7572, "step": 9670 }, { "epoch": 0.6584474109344444, "grad_norm": 0.25350406765937805, "learning_rate": 5.7325211648046795e-06, "loss": 0.7541, "step": 9680 }, { "epoch": 0.6591276252019386, "grad_norm": 0.24665938317775726, "learning_rate": 5.7121885437470015e-06, "loss": 0.753, "step": 9690 }, { "epoch": 0.6598078394694329, "grad_norm": 0.24728219211101532, "learning_rate": 5.691877619263581e-06, "loss": 0.7705, "step": 9700 }, { "epoch": 0.6604880537369271, "grad_norm": 0.24584805965423584, "learning_rate": 5.671588494128877e-06, "loss": 0.7659, "step": 9710 }, { "epoch": 0.6611682680044214, "grad_norm": 0.24218979477882385, "learning_rate": 5.651321271007034e-06, "loss": 0.7612, "step": 9720 }, { "epoch": 0.6618484822719156, "grad_norm": 0.25144141912460327, "learning_rate": 5.631076052451376e-06, "loss": 0.7681, "step": 9730 }, { "epoch": 0.6625286965394099, "grad_norm": 0.24840059876441956, "learning_rate": 5.610852940903881e-06, "loss": 0.7675, "step": 9740 }, { "epoch": 0.6632089108069041, "grad_norm": 0.24385057389736176, "learning_rate": 5.590652038694664e-06, "loss": 0.7585, "step": 9750 }, { "epoch": 0.6638891250743985, "grad_norm": 0.24427461624145508, "learning_rate": 5.57047344804146e-06, "loss": 0.7637, "step": 9760 }, { "epoch": 0.6645693393418927, "grad_norm": 0.2447434812784195, "learning_rate": 5.5503172710491064e-06, "loss": 0.7679, "step": 9770 }, { "epoch": 0.665249553609387, "grad_norm": 0.24962159991264343, "learning_rate": 5.530183609709025e-06, "loss": 0.7632, "step": 9780 }, { "epoch": 0.6659297678768812, "grad_norm": 0.2556682229042053, "learning_rate": 5.510072565898711e-06, "loss": 0.78, "step": 9790 }, { "epoch": 0.6666099821443755, "grad_norm": 0.2510310113430023, "learning_rate": 5.489984241381208e-06, "loss": 0.7674, "step": 9800 }, { "epoch": 0.6672901964118697, "grad_norm": 0.24962040781974792, "learning_rate": 5.469918737804601e-06, "loss": 0.7652, "step": 9810 }, { "epoch": 0.667970410679364, "grad_norm": 0.2444206029176712, "learning_rate": 5.449876156701501e-06, "loss": 0.7684, "step": 9820 }, { "epoch": 0.6686506249468582, "grad_norm": 0.26629745960235596, "learning_rate": 5.42985659948853e-06, "loss": 0.7695, "step": 9830 }, { "epoch": 0.6693308392143525, "grad_norm": 0.2458677440881729, "learning_rate": 5.409860167465806e-06, "loss": 0.7602, "step": 9840 }, { "epoch": 0.6700110534818468, "grad_norm": 0.24040453135967255, "learning_rate": 5.3898869618164325e-06, "loss": 0.7624, "step": 9850 }, { "epoch": 0.6706912677493411, "grad_norm": 0.23869656026363373, "learning_rate": 5.369937083605986e-06, "loss": 0.767, "step": 9860 }, { "epoch": 0.6713714820168353, "grad_norm": 0.24998657405376434, "learning_rate": 5.350010633782007e-06, "loss": 0.7638, "step": 9870 }, { "epoch": 0.6720516962843296, "grad_norm": 0.2390597015619278, "learning_rate": 5.3301077131734846e-06, "loss": 0.7647, "step": 9880 }, { "epoch": 0.6727319105518238, "grad_norm": 0.26572591066360474, "learning_rate": 5.310228422490349e-06, "loss": 0.7541, "step": 9890 }, { "epoch": 0.6734121248193181, "grad_norm": 0.24828065931797028, "learning_rate": 5.290372862322964e-06, "loss": 0.766, "step": 9900 }, { "epoch": 0.6740923390868123, "grad_norm": 0.24568676948547363, "learning_rate": 5.270541133141611e-06, "loss": 0.7575, "step": 9910 }, { "epoch": 0.6747725533543066, "grad_norm": 0.2494325488805771, "learning_rate": 5.250733335295992e-06, "loss": 0.774, "step": 9920 }, { "epoch": 0.6754527676218008, "grad_norm": 0.24998196959495544, "learning_rate": 5.230949569014707e-06, "loss": 0.7637, "step": 9930 }, { "epoch": 0.6761329818892952, "grad_norm": 0.24165105819702148, "learning_rate": 5.211189934404763e-06, "loss": 0.7714, "step": 9940 }, { "epoch": 0.6768131961567894, "grad_norm": 0.24361291527748108, "learning_rate": 5.1914545314510515e-06, "loss": 0.7698, "step": 9950 }, { "epoch": 0.6774934104242837, "grad_norm": 0.25533944368362427, "learning_rate": 5.171743460015857e-06, "loss": 0.7594, "step": 9960 }, { "epoch": 0.6781736246917779, "grad_norm": 0.24661685526371002, "learning_rate": 5.15205681983834e-06, "loss": 0.7473, "step": 9970 }, { "epoch": 0.6788538389592722, "grad_norm": 0.25577622652053833, "learning_rate": 5.132394710534041e-06, "loss": 0.7585, "step": 9980 }, { "epoch": 0.6795340532267664, "grad_norm": 0.256303995847702, "learning_rate": 5.112757231594374e-06, "loss": 0.7674, "step": 9990 }, { "epoch": 0.6802142674942607, "grad_norm": 0.24000784754753113, "learning_rate": 5.0931444823861155e-06, "loss": 0.7725, "step": 10000 }, { "epoch": 0.6808944817617549, "grad_norm": 0.23835529386997223, "learning_rate": 5.073556562150914e-06, "loss": 0.766, "step": 10010 }, { "epoch": 0.6815746960292492, "grad_norm": 0.2461031973361969, "learning_rate": 5.05399357000478e-06, "loss": 0.7606, "step": 10020 }, { "epoch": 0.6822549102967435, "grad_norm": 0.2479454129934311, "learning_rate": 5.034455604937587e-06, "loss": 0.7648, "step": 10030 }, { "epoch": 0.6829351245642378, "grad_norm": 0.2555757761001587, "learning_rate": 5.014942765812574e-06, "loss": 0.7726, "step": 10040 }, { "epoch": 0.683615338831732, "grad_norm": 0.25147545337677, "learning_rate": 4.995455151365828e-06, "loss": 0.7566, "step": 10050 }, { "epoch": 0.6842955530992263, "grad_norm": 0.23948884010314941, "learning_rate": 4.975992860205811e-06, "loss": 0.7648, "step": 10060 }, { "epoch": 0.6849757673667205, "grad_norm": 0.2528388500213623, "learning_rate": 4.956555990812842e-06, "loss": 0.7524, "step": 10070 }, { "epoch": 0.6856559816342148, "grad_norm": 0.25195372104644775, "learning_rate": 4.937144641538607e-06, "loss": 0.7588, "step": 10080 }, { "epoch": 0.686336195901709, "grad_norm": 0.2495216578245163, "learning_rate": 4.917758910605658e-06, "loss": 0.7605, "step": 10090 }, { "epoch": 0.6870164101692033, "grad_norm": 0.24449487030506134, "learning_rate": 4.898398896106914e-06, "loss": 0.7571, "step": 10100 }, { "epoch": 0.6876966244366975, "grad_norm": 0.24723927676677704, "learning_rate": 4.879064696005171e-06, "loss": 0.7487, "step": 10110 }, { "epoch": 0.6883768387041919, "grad_norm": 0.24362149834632874, "learning_rate": 4.859756408132601e-06, "loss": 0.7623, "step": 10120 }, { "epoch": 0.6890570529716861, "grad_norm": 0.24735696613788605, "learning_rate": 4.840474130190258e-06, "loss": 0.7534, "step": 10130 }, { "epoch": 0.6897372672391804, "grad_norm": 0.24546845257282257, "learning_rate": 4.821217959747585e-06, "loss": 0.7657, "step": 10140 }, { "epoch": 0.6904174815066746, "grad_norm": 0.2601371109485626, "learning_rate": 4.801987994241921e-06, "loss": 0.7578, "step": 10150 }, { "epoch": 0.6910976957741689, "grad_norm": 0.2417573481798172, "learning_rate": 4.782784330978003e-06, "loss": 0.7666, "step": 10160 }, { "epoch": 0.6917779100416631, "grad_norm": 0.23398882150650024, "learning_rate": 4.763607067127479e-06, "loss": 0.7694, "step": 10170 }, { "epoch": 0.6924581243091574, "grad_norm": 0.24017900228500366, "learning_rate": 4.744456299728417e-06, "loss": 0.7645, "step": 10180 }, { "epoch": 0.6931383385766516, "grad_norm": 0.2445467710494995, "learning_rate": 4.725332125684807e-06, "loss": 0.7655, "step": 10190 }, { "epoch": 0.693818552844146, "grad_norm": 0.25575804710388184, "learning_rate": 4.7062346417660775e-06, "loss": 0.7612, "step": 10200 }, { "epoch": 0.6944987671116402, "grad_norm": 0.24726071953773499, "learning_rate": 4.687163944606603e-06, "loss": 0.7607, "step": 10210 }, { "epoch": 0.6951789813791345, "grad_norm": 0.24544966220855713, "learning_rate": 4.6681201307052144e-06, "loss": 0.7523, "step": 10220 }, { "epoch": 0.6958591956466287, "grad_norm": 0.24802793562412262, "learning_rate": 4.649103296424716e-06, "loss": 0.77, "step": 10230 }, { "epoch": 0.696539409914123, "grad_norm": 0.24389968812465668, "learning_rate": 4.630113537991388e-06, "loss": 0.7582, "step": 10240 }, { "epoch": 0.6972196241816172, "grad_norm": 0.24656598269939423, "learning_rate": 4.611150951494506e-06, "loss": 0.7747, "step": 10250 }, { "epoch": 0.6978998384491115, "grad_norm": 0.24470894038677216, "learning_rate": 4.59221563288586e-06, "loss": 0.7651, "step": 10260 }, { "epoch": 0.6985800527166057, "grad_norm": 0.2406274378299713, "learning_rate": 4.573307677979255e-06, "loss": 0.7576, "step": 10270 }, { "epoch": 0.6992602669841, "grad_norm": 0.24292300641536713, "learning_rate": 4.554427182450039e-06, "loss": 0.7615, "step": 10280 }, { "epoch": 0.6999404812515942, "grad_norm": 0.2497672587633133, "learning_rate": 4.535574241834615e-06, "loss": 0.7625, "step": 10290 }, { "epoch": 0.7006206955190886, "grad_norm": 0.24484290182590485, "learning_rate": 4.51674895152995e-06, "loss": 0.758, "step": 10300 }, { "epoch": 0.7013009097865828, "grad_norm": 0.25799936056137085, "learning_rate": 4.497951406793105e-06, "loss": 0.7546, "step": 10310 }, { "epoch": 0.7019811240540771, "grad_norm": 0.24973861873149872, "learning_rate": 4.479181702740746e-06, "loss": 0.7709, "step": 10320 }, { "epoch": 0.7026613383215713, "grad_norm": 0.24379494786262512, "learning_rate": 4.460439934348661e-06, "loss": 0.7653, "step": 10330 }, { "epoch": 0.7033415525890656, "grad_norm": 0.24392396211624146, "learning_rate": 4.441726196451284e-06, "loss": 0.7573, "step": 10340 }, { "epoch": 0.7040217668565598, "grad_norm": 0.26352494955062866, "learning_rate": 4.423040583741211e-06, "loss": 0.7665, "step": 10350 }, { "epoch": 0.704701981124054, "grad_norm": 0.2395162284374237, "learning_rate": 4.4043831907687255e-06, "loss": 0.7475, "step": 10360 }, { "epoch": 0.7053821953915483, "grad_norm": 0.2522451877593994, "learning_rate": 4.385754111941316e-06, "loss": 0.7573, "step": 10370 }, { "epoch": 0.7060624096590425, "grad_norm": 0.2469368726015091, "learning_rate": 4.367153441523202e-06, "loss": 0.7517, "step": 10380 }, { "epoch": 0.7067426239265369, "grad_norm": 0.24479222297668457, "learning_rate": 4.34858127363485e-06, "loss": 0.7667, "step": 10390 }, { "epoch": 0.7074228381940311, "grad_norm": 0.24857044219970703, "learning_rate": 4.330037702252505e-06, "loss": 0.7595, "step": 10400 }, { "epoch": 0.7081030524615254, "grad_norm": 0.2452039271593094, "learning_rate": 4.311522821207715e-06, "loss": 0.7381, "step": 10410 }, { "epoch": 0.7087832667290196, "grad_norm": 0.24671368300914764, "learning_rate": 4.293036724186848e-06, "loss": 0.76, "step": 10420 }, { "epoch": 0.7094634809965139, "grad_norm": 0.25648754835128784, "learning_rate": 4.274579504730626e-06, "loss": 0.765, "step": 10430 }, { "epoch": 0.7101436952640081, "grad_norm": 0.2619253993034363, "learning_rate": 4.2561512562336475e-06, "loss": 0.7707, "step": 10440 }, { "epoch": 0.7108239095315024, "grad_norm": 0.2492845207452774, "learning_rate": 4.237752071943917e-06, "loss": 0.7582, "step": 10450 }, { "epoch": 0.7115041237989966, "grad_norm": 0.2471589595079422, "learning_rate": 4.219382044962374e-06, "loss": 0.7616, "step": 10460 }, { "epoch": 0.712184338066491, "grad_norm": 0.25441455841064453, "learning_rate": 4.201041268242418e-06, "loss": 0.7698, "step": 10470 }, { "epoch": 0.7128645523339852, "grad_norm": 0.25763577222824097, "learning_rate": 4.1827298345894394e-06, "loss": 0.7462, "step": 10480 }, { "epoch": 0.7135447666014795, "grad_norm": 0.24413402378559113, "learning_rate": 4.164447836660354e-06, "loss": 0.7664, "step": 10490 }, { "epoch": 0.7142249808689737, "grad_norm": 0.24375353753566742, "learning_rate": 4.146195366963133e-06, "loss": 0.7617, "step": 10500 }, { "epoch": 0.714905195136468, "grad_norm": 0.25362712144851685, "learning_rate": 4.127972517856326e-06, "loss": 0.7672, "step": 10510 }, { "epoch": 0.7155854094039622, "grad_norm": 0.25445815920829773, "learning_rate": 4.109779381548605e-06, "loss": 0.7639, "step": 10520 }, { "epoch": 0.7162656236714565, "grad_norm": 0.24999208748340607, "learning_rate": 4.091616050098296e-06, "loss": 0.7601, "step": 10530 }, { "epoch": 0.7169458379389507, "grad_norm": 0.2546180784702301, "learning_rate": 4.073482615412905e-06, "loss": 0.7778, "step": 10540 }, { "epoch": 0.717626052206445, "grad_norm": 0.24735789000988007, "learning_rate": 4.055379169248663e-06, "loss": 0.7719, "step": 10550 }, { "epoch": 0.7183062664739392, "grad_norm": 0.2446472942829132, "learning_rate": 4.037305803210057e-06, "loss": 0.7729, "step": 10560 }, { "epoch": 0.7189864807414336, "grad_norm": 0.2434018850326538, "learning_rate": 4.019262608749362e-06, "loss": 0.765, "step": 10570 }, { "epoch": 0.7196666950089278, "grad_norm": 0.24566373229026794, "learning_rate": 4.00124967716619e-06, "loss": 0.7629, "step": 10580 }, { "epoch": 0.7203469092764221, "grad_norm": 0.2555360496044159, "learning_rate": 3.9832670996070175e-06, "loss": 0.7533, "step": 10590 }, { "epoch": 0.7210271235439163, "grad_norm": 0.25441837310791016, "learning_rate": 3.9653149670647274e-06, "loss": 0.7723, "step": 10600 }, { "epoch": 0.7217073378114106, "grad_norm": 0.24170216917991638, "learning_rate": 3.947393370378151e-06, "loss": 0.7526, "step": 10610 }, { "epoch": 0.7223875520789048, "grad_norm": 0.24596717953681946, "learning_rate": 3.929502400231603e-06, "loss": 0.7587, "step": 10620 }, { "epoch": 0.7230677663463991, "grad_norm": 0.24249835312366486, "learning_rate": 3.911642147154431e-06, "loss": 0.7504, "step": 10630 }, { "epoch": 0.7237479806138933, "grad_norm": 0.23611494898796082, "learning_rate": 3.8938127015205484e-06, "loss": 0.7547, "step": 10640 }, { "epoch": 0.7244281948813877, "grad_norm": 0.24255642294883728, "learning_rate": 3.8760141535479835e-06, "loss": 0.7564, "step": 10650 }, { "epoch": 0.7251084091488819, "grad_norm": 0.24236130714416504, "learning_rate": 3.858246593298418e-06, "loss": 0.7567, "step": 10660 }, { "epoch": 0.7257886234163762, "grad_norm": 0.25300332903862, "learning_rate": 3.840510110676737e-06, "loss": 0.769, "step": 10670 }, { "epoch": 0.7264688376838704, "grad_norm": 0.24559025466442108, "learning_rate": 3.822804795430567e-06, "loss": 0.7617, "step": 10680 }, { "epoch": 0.7271490519513647, "grad_norm": 0.2505420446395874, "learning_rate": 3.8051307371498323e-06, "loss": 0.7678, "step": 10690 }, { "epoch": 0.7278292662188589, "grad_norm": 0.25066983699798584, "learning_rate": 3.7874880252662905e-06, "loss": 0.7725, "step": 10700 }, { "epoch": 0.7285094804863532, "grad_norm": 0.25248897075653076, "learning_rate": 3.769876749053084e-06, "loss": 0.7728, "step": 10710 }, { "epoch": 0.7291896947538474, "grad_norm": 0.23779115080833435, "learning_rate": 3.7522969976242917e-06, "loss": 0.7559, "step": 10720 }, { "epoch": 0.7298699090213417, "grad_norm": 0.248768612742424, "learning_rate": 3.7347488599344716e-06, "loss": 0.7719, "step": 10730 }, { "epoch": 0.730550123288836, "grad_norm": 0.245006263256073, "learning_rate": 3.7172324247782188e-06, "loss": 0.7632, "step": 10740 }, { "epoch": 0.7312303375563303, "grad_norm": 0.23913902044296265, "learning_rate": 3.6997477807897075e-06, "loss": 0.7488, "step": 10750 }, { "epoch": 0.7319105518238245, "grad_norm": 0.24731720983982086, "learning_rate": 3.682295016442251e-06, "loss": 0.7764, "step": 10760 }, { "epoch": 0.7325907660913188, "grad_norm": 0.23971058428287506, "learning_rate": 3.664874220047845e-06, "loss": 0.7626, "step": 10770 }, { "epoch": 0.733270980358813, "grad_norm": 0.24796296656131744, "learning_rate": 3.6474854797567305e-06, "loss": 0.7585, "step": 10780 }, { "epoch": 0.7339511946263073, "grad_norm": 0.24577690660953522, "learning_rate": 3.630128883556938e-06, "loss": 0.7627, "step": 10790 }, { "epoch": 0.7346314088938015, "grad_norm": 0.25564873218536377, "learning_rate": 3.6128045192738503e-06, "loss": 0.7571, "step": 10800 }, { "epoch": 0.7353116231612958, "grad_norm": 0.2462102174758911, "learning_rate": 3.5955124745697545e-06, "loss": 0.7675, "step": 10810 }, { "epoch": 0.73599183742879, "grad_norm": 0.29949715733528137, "learning_rate": 3.578252836943398e-06, "loss": 0.7658, "step": 10820 }, { "epoch": 0.7366720516962844, "grad_norm": 0.2516492009162903, "learning_rate": 3.561025693729545e-06, "loss": 0.7717, "step": 10830 }, { "epoch": 0.7373522659637786, "grad_norm": 0.2486589401960373, "learning_rate": 3.5438311320985397e-06, "loss": 0.7645, "step": 10840 }, { "epoch": 0.7380324802312729, "grad_norm": 0.24454742670059204, "learning_rate": 3.526669239055859e-06, "loss": 0.7699, "step": 10850 }, { "epoch": 0.7387126944987671, "grad_norm": 0.24663999676704407, "learning_rate": 3.5095401014416753e-06, "loss": 0.7823, "step": 10860 }, { "epoch": 0.7393929087662614, "grad_norm": 0.24899105727672577, "learning_rate": 3.4924438059304168e-06, "loss": 0.7652, "step": 10870 }, { "epoch": 0.7400731230337556, "grad_norm": 0.24318116903305054, "learning_rate": 3.475380439030328e-06, "loss": 0.7585, "step": 10880 }, { "epoch": 0.7407533373012499, "grad_norm": 0.24379071593284607, "learning_rate": 3.4583500870830356e-06, "loss": 0.7751, "step": 10890 }, { "epoch": 0.7414335515687441, "grad_norm": 0.24555222690105438, "learning_rate": 3.4413528362631043e-06, "loss": 0.7516, "step": 10900 }, { "epoch": 0.7421137658362384, "grad_norm": 0.2604420781135559, "learning_rate": 3.4243887725776093e-06, "loss": 0.7675, "step": 10910 }, { "epoch": 0.7427939801037327, "grad_norm": 0.25211283564567566, "learning_rate": 3.4074579818656972e-06, "loss": 0.7617, "step": 10920 }, { "epoch": 0.743474194371227, "grad_norm": 0.24558304250240326, "learning_rate": 3.390560549798143e-06, "loss": 0.7775, "step": 10930 }, { "epoch": 0.7441544086387212, "grad_norm": 0.25397711992263794, "learning_rate": 3.373696561876936e-06, "loss": 0.7494, "step": 10940 }, { "epoch": 0.7448346229062155, "grad_norm": 0.24445897340774536, "learning_rate": 3.3568661034348303e-06, "loss": 0.766, "step": 10950 }, { "epoch": 0.7455148371737097, "grad_norm": 0.24605581164360046, "learning_rate": 3.3400692596349206e-06, "loss": 0.7508, "step": 10960 }, { "epoch": 0.746195051441204, "grad_norm": 0.24277006089687347, "learning_rate": 3.3233061154702086e-06, "loss": 0.7616, "step": 10970 }, { "epoch": 0.7468752657086982, "grad_norm": 0.2509525418281555, "learning_rate": 3.3065767557631757e-06, "loss": 0.7653, "step": 10980 }, { "epoch": 0.7475554799761925, "grad_norm": 0.2534981071949005, "learning_rate": 3.289881265165349e-06, "loss": 0.7752, "step": 10990 }, { "epoch": 0.7482356942436867, "grad_norm": 0.24551300704479218, "learning_rate": 3.273219728156879e-06, "loss": 0.7719, "step": 11000 }, { "epoch": 0.7489159085111811, "grad_norm": 0.2448212206363678, "learning_rate": 3.256592229046108e-06, "loss": 0.7564, "step": 11010 }, { "epoch": 0.7495961227786753, "grad_norm": 0.254169762134552, "learning_rate": 3.239998851969144e-06, "loss": 0.7488, "step": 11020 }, { "epoch": 0.7502763370461696, "grad_norm": 0.24657952785491943, "learning_rate": 3.2234396808894353e-06, "loss": 0.7653, "step": 11030 }, { "epoch": 0.7509565513136638, "grad_norm": 0.2531900107860565, "learning_rate": 3.2069147995973473e-06, "loss": 0.7481, "step": 11040 }, { "epoch": 0.7516367655811581, "grad_norm": 0.25895965099334717, "learning_rate": 3.190424291709735e-06, "loss": 0.7577, "step": 11050 }, { "epoch": 0.7523169798486523, "grad_norm": 0.25525858998298645, "learning_rate": 3.173968240669525e-06, "loss": 0.7679, "step": 11060 }, { "epoch": 0.7529971941161466, "grad_norm": 0.24357037246227264, "learning_rate": 3.1575467297452868e-06, "loss": 0.7567, "step": 11070 }, { "epoch": 0.7536774083836408, "grad_norm": 0.24586918950080872, "learning_rate": 3.1411598420308175e-06, "loss": 0.7601, "step": 11080 }, { "epoch": 0.7543576226511352, "grad_norm": 0.2497064471244812, "learning_rate": 3.1248076604447177e-06, "loss": 0.7553, "step": 11090 }, { "epoch": 0.7550378369186294, "grad_norm": 0.2547801434993744, "learning_rate": 3.1084902677299733e-06, "loss": 0.769, "step": 11100 }, { "epoch": 0.7557180511861237, "grad_norm": 0.25114110112190247, "learning_rate": 3.092207746453535e-06, "loss": 0.7658, "step": 11110 }, { "epoch": 0.7563982654536179, "grad_norm": 0.2509963810443878, "learning_rate": 3.075960179005906e-06, "loss": 0.7631, "step": 11120 }, { "epoch": 0.7570784797211122, "grad_norm": 0.24080003798007965, "learning_rate": 3.0597476476007148e-06, "loss": 0.7514, "step": 11130 }, { "epoch": 0.7577586939886064, "grad_norm": 0.23864655196666718, "learning_rate": 3.0435702342743122e-06, "loss": 0.7471, "step": 11140 }, { "epoch": 0.7584389082561007, "grad_norm": 0.24954387545585632, "learning_rate": 3.0274280208853455e-06, "loss": 0.7598, "step": 11150 }, { "epoch": 0.7591191225235949, "grad_norm": 0.2520304322242737, "learning_rate": 3.0113210891143486e-06, "loss": 0.7677, "step": 11160 }, { "epoch": 0.7597993367910892, "grad_norm": 0.23881210386753082, "learning_rate": 2.9952495204633293e-06, "loss": 0.7517, "step": 11170 }, { "epoch": 0.7604795510585834, "grad_norm": 0.24030640721321106, "learning_rate": 2.979213396255356e-06, "loss": 0.7669, "step": 11180 }, { "epoch": 0.7611597653260778, "grad_norm": 0.24254277348518372, "learning_rate": 2.963212797634145e-06, "loss": 0.7777, "step": 11190 }, { "epoch": 0.761839979593572, "grad_norm": 0.2521003186702728, "learning_rate": 2.947247805563652e-06, "loss": 0.7735, "step": 11200 }, { "epoch": 0.7625201938610663, "grad_norm": 0.24352481961250305, "learning_rate": 2.9313185008276634e-06, "loss": 0.76, "step": 11210 }, { "epoch": 0.7632004081285605, "grad_norm": 0.24368160963058472, "learning_rate": 2.9154249640293806e-06, "loss": 0.7769, "step": 11220 }, { "epoch": 0.7638806223960548, "grad_norm": 0.2543738782405853, "learning_rate": 2.8995672755910208e-06, "loss": 0.7659, "step": 11230 }, { "epoch": 0.764560836663549, "grad_norm": 0.24566735327243805, "learning_rate": 2.883745515753408e-06, "loss": 0.7739, "step": 11240 }, { "epoch": 0.7652410509310433, "grad_norm": 0.24797692894935608, "learning_rate": 2.86795976457556e-06, "loss": 0.7712, "step": 11250 }, { "epoch": 0.7659212651985375, "grad_norm": 0.252880334854126, "learning_rate": 2.852210101934294e-06, "loss": 0.7662, "step": 11260 }, { "epoch": 0.7666014794660319, "grad_norm": 0.26727163791656494, "learning_rate": 2.836496607523813e-06, "loss": 0.7647, "step": 11270 }, { "epoch": 0.7672816937335261, "grad_norm": 0.2518885135650635, "learning_rate": 2.8208193608553115e-06, "loss": 0.7589, "step": 11280 }, { "epoch": 0.7679619080010204, "grad_norm": 0.25065723061561584, "learning_rate": 2.805178441256564e-06, "loss": 0.7597, "step": 11290 }, { "epoch": 0.7686421222685146, "grad_norm": 0.2463270127773285, "learning_rate": 2.789573927871528e-06, "loss": 0.7574, "step": 11300 }, { "epoch": 0.7693223365360088, "grad_norm": 0.254273921251297, "learning_rate": 2.7740058996599475e-06, "loss": 0.7617, "step": 11310 }, { "epoch": 0.7700025508035031, "grad_norm": 0.2485022246837616, "learning_rate": 2.758474435396944e-06, "loss": 0.7761, "step": 11320 }, { "epoch": 0.7706827650709973, "grad_norm": 0.23881711065769196, "learning_rate": 2.7429796136726283e-06, "loss": 0.7586, "step": 11330 }, { "epoch": 0.7713629793384916, "grad_norm": 0.24466168880462646, "learning_rate": 2.7275215128916943e-06, "loss": 0.7641, "step": 11340 }, { "epoch": 0.7720431936059858, "grad_norm": 0.24188317358493805, "learning_rate": 2.712100211273028e-06, "loss": 0.7547, "step": 11350 }, { "epoch": 0.7727234078734802, "grad_norm": 0.3558238744735718, "learning_rate": 2.6967157868493066e-06, "loss": 0.758, "step": 11360 }, { "epoch": 0.7734036221409744, "grad_norm": 0.24949957430362701, "learning_rate": 2.6813683174666106e-06, "loss": 0.7696, "step": 11370 }, { "epoch": 0.7740838364084687, "grad_norm": 0.25859785079956055, "learning_rate": 2.666057880784024e-06, "loss": 0.7612, "step": 11380 }, { "epoch": 0.7747640506759629, "grad_norm": 0.24926084280014038, "learning_rate": 2.650784554273241e-06, "loss": 0.751, "step": 11390 }, { "epoch": 0.7754442649434572, "grad_norm": 0.24230580031871796, "learning_rate": 2.63554841521818e-06, "loss": 0.7622, "step": 11400 }, { "epoch": 0.7761244792109514, "grad_norm": 0.24734055995941162, "learning_rate": 2.620349540714586e-06, "loss": 0.7696, "step": 11410 }, { "epoch": 0.7768046934784457, "grad_norm": 0.24714922904968262, "learning_rate": 2.6051880076696446e-06, "loss": 0.7726, "step": 11420 }, { "epoch": 0.7774849077459399, "grad_norm": 0.2531922161579132, "learning_rate": 2.59006389280159e-06, "loss": 0.7703, "step": 11430 }, { "epoch": 0.7781651220134342, "grad_norm": 0.24530744552612305, "learning_rate": 2.5749772726393198e-06, "loss": 0.7623, "step": 11440 }, { "epoch": 0.7788453362809284, "grad_norm": 0.23901546001434326, "learning_rate": 2.559928223522006e-06, "loss": 0.7695, "step": 11450 }, { "epoch": 0.7795255505484228, "grad_norm": 0.24316571652889252, "learning_rate": 2.5449168215987074e-06, "loss": 0.7451, "step": 11460 }, { "epoch": 0.780205764815917, "grad_norm": 0.2481565922498703, "learning_rate": 2.5299431428279884e-06, "loss": 0.7626, "step": 11470 }, { "epoch": 0.7808859790834113, "grad_norm": 0.24083758890628815, "learning_rate": 2.5150072629775314e-06, "loss": 0.7717, "step": 11480 }, { "epoch": 0.7815661933509055, "grad_norm": 0.24756860733032227, "learning_rate": 2.500109257623751e-06, "loss": 0.7647, "step": 11490 }, { "epoch": 0.7822464076183998, "grad_norm": 0.24836762249469757, "learning_rate": 2.485249202151417e-06, "loss": 0.7684, "step": 11500 }, { "epoch": 0.782926621885894, "grad_norm": 0.24777284264564514, "learning_rate": 2.4704271717532724e-06, "loss": 0.7641, "step": 11510 }, { "epoch": 0.7836068361533883, "grad_norm": 0.2474483698606491, "learning_rate": 2.4556432414296485e-06, "loss": 0.7708, "step": 11520 }, { "epoch": 0.7842870504208825, "grad_norm": 0.24180737137794495, "learning_rate": 2.4408974859880884e-06, "loss": 0.7556, "step": 11530 }, { "epoch": 0.7849672646883769, "grad_norm": 0.24215582013130188, "learning_rate": 2.4261899800429702e-06, "loss": 0.7626, "step": 11540 }, { "epoch": 0.7856474789558711, "grad_norm": 0.2542628347873688, "learning_rate": 2.4115207980151225e-06, "loss": 0.7549, "step": 11550 }, { "epoch": 0.7863276932233654, "grad_norm": 0.244821697473526, "learning_rate": 2.396890014131459e-06, "loss": 0.7607, "step": 11560 }, { "epoch": 0.7870079074908596, "grad_norm": 0.2469116747379303, "learning_rate": 2.382297702424591e-06, "loss": 0.7712, "step": 11570 }, { "epoch": 0.7876881217583539, "grad_norm": 0.24981118738651276, "learning_rate": 2.3677439367324628e-06, "loss": 0.7527, "step": 11580 }, { "epoch": 0.7883683360258481, "grad_norm": 0.24756966531276703, "learning_rate": 2.353228790697969e-06, "loss": 0.7731, "step": 11590 }, { "epoch": 0.7890485502933424, "grad_norm": 0.2556615173816681, "learning_rate": 2.3387523377685915e-06, "loss": 0.7545, "step": 11600 }, { "epoch": 0.7897287645608366, "grad_norm": 0.25671324133872986, "learning_rate": 2.324314651196019e-06, "loss": 0.7656, "step": 11610 }, { "epoch": 0.7904089788283309, "grad_norm": 0.2505514919757843, "learning_rate": 2.3099158040357827e-06, "loss": 0.7579, "step": 11620 }, { "epoch": 0.7910891930958251, "grad_norm": 0.2539149820804596, "learning_rate": 2.2955558691468827e-06, "loss": 0.7674, "step": 11630 }, { "epoch": 0.7917694073633195, "grad_norm": 0.24224653840065002, "learning_rate": 2.2812349191914197e-06, "loss": 0.7535, "step": 11640 }, { "epoch": 0.7924496216308137, "grad_norm": 0.24667827785015106, "learning_rate": 2.266953026634231e-06, "loss": 0.7617, "step": 11650 }, { "epoch": 0.793129835898308, "grad_norm": 0.5902561545372009, "learning_rate": 2.252710263742519e-06, "loss": 0.7465, "step": 11660 }, { "epoch": 0.7938100501658022, "grad_norm": 0.2432839274406433, "learning_rate": 2.23850670258549e-06, "loss": 0.756, "step": 11670 }, { "epoch": 0.7944902644332965, "grad_norm": 0.2495686411857605, "learning_rate": 2.2243424150339853e-06, "loss": 0.7635, "step": 11680 }, { "epoch": 0.7951704787007907, "grad_norm": 0.24666094779968262, "learning_rate": 2.210217472760121e-06, "loss": 0.7577, "step": 11690 }, { "epoch": 0.795850692968285, "grad_norm": 0.25573796033859253, "learning_rate": 2.1961319472369214e-06, "loss": 0.7694, "step": 11700 }, { "epoch": 0.7965309072357792, "grad_norm": 0.24457398056983948, "learning_rate": 2.182085909737961e-06, "loss": 0.7485, "step": 11710 }, { "epoch": 0.7972111215032736, "grad_norm": 0.24250029027462006, "learning_rate": 2.1680794313370044e-06, "loss": 0.7612, "step": 11720 }, { "epoch": 0.7978913357707678, "grad_norm": 0.2539270222187042, "learning_rate": 2.1541125829076436e-06, "loss": 0.7544, "step": 11730 }, { "epoch": 0.7985715500382621, "grad_norm": 0.2482372373342514, "learning_rate": 2.140185435122939e-06, "loss": 0.7616, "step": 11740 }, { "epoch": 0.7992517643057563, "grad_norm": 0.2496422976255417, "learning_rate": 2.126298058455066e-06, "loss": 0.7509, "step": 11750 }, { "epoch": 0.7999319785732506, "grad_norm": 0.24733223021030426, "learning_rate": 2.1124505231749537e-06, "loss": 0.7622, "step": 11760 }, { "epoch": 0.8006121928407448, "grad_norm": 0.2519744038581848, "learning_rate": 2.0986428993519324e-06, "loss": 0.7608, "step": 11770 }, { "epoch": 0.8012924071082391, "grad_norm": 0.250882089138031, "learning_rate": 2.0848752568533804e-06, "loss": 0.7626, "step": 11780 }, { "epoch": 0.8019726213757333, "grad_norm": 0.2523823380470276, "learning_rate": 2.071147665344363e-06, "loss": 0.7615, "step": 11790 }, { "epoch": 0.8026528356432276, "grad_norm": 0.252625972032547, "learning_rate": 2.0574601942872895e-06, "loss": 0.7655, "step": 11800 }, { "epoch": 0.8033330499107219, "grad_norm": 0.2436279058456421, "learning_rate": 2.0438129129415564e-06, "loss": 0.7458, "step": 11810 }, { "epoch": 0.8040132641782162, "grad_norm": 0.24941012263298035, "learning_rate": 2.030205890363197e-06, "loss": 0.7644, "step": 11820 }, { "epoch": 0.8046934784457104, "grad_norm": 0.24483945965766907, "learning_rate": 2.016639195404534e-06, "loss": 0.7511, "step": 11830 }, { "epoch": 0.8053736927132047, "grad_norm": 0.2422882467508316, "learning_rate": 2.003112896713829e-06, "loss": 0.7584, "step": 11840 }, { "epoch": 0.8060539069806989, "grad_norm": 0.246101513504982, "learning_rate": 1.9896270627349367e-06, "loss": 0.7698, "step": 11850 }, { "epoch": 0.8067341212481932, "grad_norm": 0.24442127346992493, "learning_rate": 1.976181761706959e-06, "loss": 0.7584, "step": 11860 }, { "epoch": 0.8074143355156874, "grad_norm": 0.24296994507312775, "learning_rate": 1.9627770616638953e-06, "loss": 0.7692, "step": 11870 }, { "epoch": 0.8080945497831817, "grad_norm": 0.24183446168899536, "learning_rate": 1.9494130304343064e-06, "loss": 0.748, "step": 11880 }, { "epoch": 0.8087747640506759, "grad_norm": 0.24936461448669434, "learning_rate": 1.9360897356409627e-06, "loss": 0.7562, "step": 11890 }, { "epoch": 0.8094549783181703, "grad_norm": 0.25846680998802185, "learning_rate": 1.9228072447005086e-06, "loss": 0.7644, "step": 11900 }, { "epoch": 0.8101351925856645, "grad_norm": 0.2411743700504303, "learning_rate": 1.9095656248231167e-06, "loss": 0.749, "step": 11910 }, { "epoch": 0.8108154068531588, "grad_norm": 0.24309656023979187, "learning_rate": 1.896364943012149e-06, "loss": 0.7574, "step": 11920 }, { "epoch": 0.811495621120653, "grad_norm": 0.24680094420909882, "learning_rate": 1.8832052660638223e-06, "loss": 0.7683, "step": 11930 }, { "epoch": 0.8121758353881473, "grad_norm": 0.2567564845085144, "learning_rate": 1.8700866605668622e-06, "loss": 0.7664, "step": 11940 }, { "epoch": 0.8128560496556415, "grad_norm": 0.24540911614894867, "learning_rate": 1.8570091929021738e-06, "loss": 0.7622, "step": 11950 }, { "epoch": 0.8135362639231358, "grad_norm": 0.24130462110042572, "learning_rate": 1.843972929242499e-06, "loss": 0.7405, "step": 11960 }, { "epoch": 0.81421647819063, "grad_norm": 0.23911410570144653, "learning_rate": 1.8309779355520885e-06, "loss": 0.7724, "step": 11970 }, { "epoch": 0.8148966924581243, "grad_norm": 0.24407736957073212, "learning_rate": 1.818024277586361e-06, "loss": 0.7581, "step": 11980 }, { "epoch": 0.8155769067256186, "grad_norm": 0.2502209544181824, "learning_rate": 1.8051120208915774e-06, "loss": 0.7427, "step": 11990 }, { "epoch": 0.8162571209931129, "grad_norm": 0.252231627702713, "learning_rate": 1.7922412308045035e-06, "loss": 0.7646, "step": 12000 }, { "epoch": 0.8169373352606071, "grad_norm": 0.2540828585624695, "learning_rate": 1.7794119724520832e-06, "loss": 0.7605, "step": 12010 }, { "epoch": 0.8176175495281014, "grad_norm": 0.24380894005298615, "learning_rate": 1.766624310751106e-06, "loss": 0.7566, "step": 12020 }, { "epoch": 0.8182977637955956, "grad_norm": 0.24279260635375977, "learning_rate": 1.7538783104078804e-06, "loss": 0.7372, "step": 12030 }, { "epoch": 0.8189779780630899, "grad_norm": 0.24932003021240234, "learning_rate": 1.7411740359179075e-06, "loss": 0.7704, "step": 12040 }, { "epoch": 0.8196581923305841, "grad_norm": 0.24557797610759735, "learning_rate": 1.7285115515655492e-06, "loss": 0.7509, "step": 12050 }, { "epoch": 0.8203384065980784, "grad_norm": 0.2418263554573059, "learning_rate": 1.715890921423713e-06, "loss": 0.7537, "step": 12060 }, { "epoch": 0.8210186208655726, "grad_norm": 0.25545066595077515, "learning_rate": 1.703312209353516e-06, "loss": 0.7598, "step": 12070 }, { "epoch": 0.821698835133067, "grad_norm": 0.24509641528129578, "learning_rate": 1.69077547900397e-06, "loss": 0.7559, "step": 12080 }, { "epoch": 0.8223790494005612, "grad_norm": 0.24777643382549286, "learning_rate": 1.6782807938116564e-06, "loss": 0.7566, "step": 12090 }, { "epoch": 0.8230592636680555, "grad_norm": 0.24264010787010193, "learning_rate": 1.665828217000407e-06, "loss": 0.771, "step": 12100 }, { "epoch": 0.8237394779355497, "grad_norm": 0.24878935515880585, "learning_rate": 1.6534178115809795e-06, "loss": 0.7517, "step": 12110 }, { "epoch": 0.824419692203044, "grad_norm": 0.25836506485939026, "learning_rate": 1.6410496403507459e-06, "loss": 0.7672, "step": 12120 }, { "epoch": 0.8250999064705382, "grad_norm": 0.24952253699302673, "learning_rate": 1.6287237658933685e-06, "loss": 0.7655, "step": 12130 }, { "epoch": 0.8257801207380325, "grad_norm": 0.2452516406774521, "learning_rate": 1.616440250578485e-06, "loss": 0.77, "step": 12140 }, { "epoch": 0.8264603350055267, "grad_norm": 0.25019338726997375, "learning_rate": 1.6041991565613935e-06, "loss": 0.7571, "step": 12150 }, { "epoch": 0.827140549273021, "grad_norm": 0.24410732090473175, "learning_rate": 1.5920005457827403e-06, "loss": 0.7615, "step": 12160 }, { "epoch": 0.8278207635405153, "grad_norm": 0.25065693259239197, "learning_rate": 1.5798444799682023e-06, "loss": 0.7517, "step": 12170 }, { "epoch": 0.8285009778080096, "grad_norm": 0.2452399581670761, "learning_rate": 1.5677310206281748e-06, "loss": 0.7664, "step": 12180 }, { "epoch": 0.8291811920755038, "grad_norm": 0.24665020406246185, "learning_rate": 1.555660229057465e-06, "loss": 0.7487, "step": 12190 }, { "epoch": 0.8298614063429981, "grad_norm": 0.24330231547355652, "learning_rate": 1.5436321663349763e-06, "loss": 0.7725, "step": 12200 }, { "epoch": 0.8305416206104923, "grad_norm": 0.255862295627594, "learning_rate": 1.5316468933234042e-06, "loss": 0.7628, "step": 12210 }, { "epoch": 0.8312218348779866, "grad_norm": 0.24069169163703918, "learning_rate": 1.5197044706689211e-06, "loss": 0.7448, "step": 12220 }, { "epoch": 0.8319020491454808, "grad_norm": 0.24543890357017517, "learning_rate": 1.5078049588008802e-06, "loss": 0.7637, "step": 12230 }, { "epoch": 0.832582263412975, "grad_norm": 0.26199522614479065, "learning_rate": 1.4959484179314988e-06, "loss": 0.7701, "step": 12240 }, { "epoch": 0.8332624776804693, "grad_norm": 0.2520271837711334, "learning_rate": 1.4841349080555612e-06, "loss": 0.7724, "step": 12250 }, { "epoch": 0.8339426919479636, "grad_norm": 0.24712850153446198, "learning_rate": 1.472364488950111e-06, "loss": 0.7597, "step": 12260 }, { "epoch": 0.8346229062154579, "grad_norm": 0.25130292773246765, "learning_rate": 1.4606372201741504e-06, "loss": 0.7623, "step": 12270 }, { "epoch": 0.8353031204829521, "grad_norm": 0.2628088891506195, "learning_rate": 1.4489531610683382e-06, "loss": 0.7531, "step": 12280 }, { "epoch": 0.8359833347504464, "grad_norm": 0.24563273787498474, "learning_rate": 1.4373123707546921e-06, "loss": 0.7542, "step": 12290 }, { "epoch": 0.8366635490179406, "grad_norm": 0.24699704349040985, "learning_rate": 1.425714908136282e-06, "loss": 0.7755, "step": 12300 }, { "epoch": 0.8373437632854349, "grad_norm": 0.24536138772964478, "learning_rate": 1.4141608318969425e-06, "loss": 0.764, "step": 12310 }, { "epoch": 0.8380239775529291, "grad_norm": 0.24678239226341248, "learning_rate": 1.4026502005009678e-06, "loss": 0.7652, "step": 12320 }, { "epoch": 0.8387041918204234, "grad_norm": 0.243879035115242, "learning_rate": 1.3911830721928187e-06, "loss": 0.7596, "step": 12330 }, { "epoch": 0.8393844060879176, "grad_norm": 0.24049773812294006, "learning_rate": 1.3797595049968305e-06, "loss": 0.7662, "step": 12340 }, { "epoch": 0.840064620355412, "grad_norm": 0.2362724244594574, "learning_rate": 1.3683795567169122e-06, "loss": 0.7542, "step": 12350 }, { "epoch": 0.8407448346229062, "grad_norm": 0.252137690782547, "learning_rate": 1.3570432849362626e-06, "loss": 0.7526, "step": 12360 }, { "epoch": 0.8414250488904005, "grad_norm": 0.2419818639755249, "learning_rate": 1.3457507470170728e-06, "loss": 0.7488, "step": 12370 }, { "epoch": 0.8421052631578947, "grad_norm": 0.2455890029668808, "learning_rate": 1.3345020001002385e-06, "loss": 0.7524, "step": 12380 }, { "epoch": 0.842785477425389, "grad_norm": 0.2423350065946579, "learning_rate": 1.3232971011050711e-06, "loss": 0.7573, "step": 12390 }, { "epoch": 0.8434656916928832, "grad_norm": 0.24948550760746002, "learning_rate": 1.3121361067290072e-06, "loss": 0.7668, "step": 12400 }, { "epoch": 0.8441459059603775, "grad_norm": 0.24597923457622528, "learning_rate": 1.3010190734473248e-06, "loss": 0.7516, "step": 12410 }, { "epoch": 0.8448261202278717, "grad_norm": 0.25187960267066956, "learning_rate": 1.2899460575128586e-06, "loss": 0.761, "step": 12420 }, { "epoch": 0.845506334495366, "grad_norm": 0.24187469482421875, "learning_rate": 1.2789171149557066e-06, "loss": 0.7577, "step": 12430 }, { "epoch": 0.8461865487628603, "grad_norm": 0.24686579406261444, "learning_rate": 1.2679323015829591e-06, "loss": 0.7494, "step": 12440 }, { "epoch": 0.8468667630303546, "grad_norm": 0.24481099843978882, "learning_rate": 1.2569916729784082e-06, "loss": 0.7539, "step": 12450 }, { "epoch": 0.8475469772978488, "grad_norm": 0.24312631785869598, "learning_rate": 1.2460952845022667e-06, "loss": 0.7723, "step": 12460 }, { "epoch": 0.8482271915653431, "grad_norm": 0.24266554415225983, "learning_rate": 1.2352431912908914e-06, "loss": 0.7552, "step": 12470 }, { "epoch": 0.8489074058328373, "grad_norm": 0.24375444650650024, "learning_rate": 1.2244354482565035e-06, "loss": 0.7617, "step": 12480 }, { "epoch": 0.8495876201003316, "grad_norm": 0.24153859913349152, "learning_rate": 1.2136721100869075e-06, "loss": 0.7582, "step": 12490 }, { "epoch": 0.8502678343678258, "grad_norm": 0.24547694623470306, "learning_rate": 1.2029532312452186e-06, "loss": 0.7741, "step": 12500 }, { "epoch": 0.8509480486353201, "grad_norm": 0.25353655219078064, "learning_rate": 1.1922788659695839e-06, "loss": 0.758, "step": 12510 }, { "epoch": 0.8516282629028143, "grad_norm": 0.24620665609836578, "learning_rate": 1.1816490682729097e-06, "loss": 0.7627, "step": 12520 }, { "epoch": 0.8523084771703087, "grad_norm": 0.2532989978790283, "learning_rate": 1.1710638919425887e-06, "loss": 0.7636, "step": 12530 }, { "epoch": 0.8529886914378029, "grad_norm": 0.24980738759040833, "learning_rate": 1.1605233905402258e-06, "loss": 0.7671, "step": 12540 }, { "epoch": 0.8536689057052972, "grad_norm": 0.23702862858772278, "learning_rate": 1.150027617401368e-06, "loss": 0.7698, "step": 12550 }, { "epoch": 0.8543491199727914, "grad_norm": 0.24570266902446747, "learning_rate": 1.1395766256352348e-06, "loss": 0.7594, "step": 12560 }, { "epoch": 0.8550293342402857, "grad_norm": 0.24833616614341736, "learning_rate": 1.1291704681244498e-06, "loss": 0.7583, "step": 12570 }, { "epoch": 0.8557095485077799, "grad_norm": 0.25779956579208374, "learning_rate": 1.118809197524774e-06, "loss": 0.7553, "step": 12580 }, { "epoch": 0.8563897627752742, "grad_norm": 0.2411714643239975, "learning_rate": 1.1084928662648352e-06, "loss": 0.7624, "step": 12590 }, { "epoch": 0.8570699770427684, "grad_norm": 0.24922004342079163, "learning_rate": 1.0982215265458683e-06, "loss": 0.7614, "step": 12600 }, { "epoch": 0.8577501913102628, "grad_norm": 0.24543525278568268, "learning_rate": 1.0879952303414454e-06, "loss": 0.7632, "step": 12610 }, { "epoch": 0.858430405577757, "grad_norm": 0.24459443986415863, "learning_rate": 1.0778140293972183e-06, "loss": 0.7583, "step": 12620 }, { "epoch": 0.8591106198452513, "grad_norm": 0.2454974502325058, "learning_rate": 1.0676779752306532e-06, "loss": 0.7566, "step": 12630 }, { "epoch": 0.8597908341127455, "grad_norm": 0.24637340009212494, "learning_rate": 1.0575871191307707e-06, "loss": 0.7587, "step": 12640 }, { "epoch": 0.8604710483802398, "grad_norm": 0.2507196068763733, "learning_rate": 1.047541512157888e-06, "loss": 0.7496, "step": 12650 }, { "epoch": 0.861151262647734, "grad_norm": 0.24969299137592316, "learning_rate": 1.0375412051433575e-06, "loss": 0.7478, "step": 12660 }, { "epoch": 0.8618314769152283, "grad_norm": 0.24901236593723297, "learning_rate": 1.0275862486893129e-06, "loss": 0.7587, "step": 12670 }, { "epoch": 0.8625116911827225, "grad_norm": 0.2548344135284424, "learning_rate": 1.0176766931684078e-06, "loss": 0.7506, "step": 12680 }, { "epoch": 0.8631919054502168, "grad_norm": 0.24074122309684753, "learning_rate": 1.007812588723569e-06, "loss": 0.7605, "step": 12690 }, { "epoch": 0.863872119717711, "grad_norm": 0.2506800889968872, "learning_rate": 9.979939852677356e-07, "loss": 0.7688, "step": 12700 }, { "epoch": 0.8645523339852054, "grad_norm": 0.2466941773891449, "learning_rate": 9.88220932483609e-07, "loss": 0.7667, "step": 12710 }, { "epoch": 0.8652325482526996, "grad_norm": 0.2489737868309021, "learning_rate": 9.784934798234037e-07, "loss": 0.7507, "step": 12720 }, { "epoch": 0.8659127625201939, "grad_norm": 0.2487376183271408, "learning_rate": 9.68811676508592e-07, "loss": 0.7506, "step": 12730 }, { "epoch": 0.8665929767876881, "grad_norm": 0.24357005953788757, "learning_rate": 9.591755715296601e-07, "loss": 0.7502, "step": 12740 }, { "epoch": 0.8672731910551824, "grad_norm": 0.23553359508514404, "learning_rate": 9.495852136458572e-07, "loss": 0.7688, "step": 12750 }, { "epoch": 0.8679534053226766, "grad_norm": 0.25732094049453735, "learning_rate": 9.400406513849491e-07, "loss": 0.7543, "step": 12760 }, { "epoch": 0.8686336195901709, "grad_norm": 0.24797244369983673, "learning_rate": 9.305419330429743e-07, "loss": 0.772, "step": 12770 }, { "epoch": 0.8693138338576651, "grad_norm": 0.2497953623533249, "learning_rate": 9.210891066839966e-07, "loss": 0.7647, "step": 12780 }, { "epoch": 0.8699940481251595, "grad_norm": 0.2506360113620758, "learning_rate": 9.116822201398657e-07, "loss": 0.7741, "step": 12790 }, { "epoch": 0.8706742623926537, "grad_norm": 0.24232223629951477, "learning_rate": 9.023213210099712e-07, "loss": 0.7718, "step": 12800 }, { "epoch": 0.871354476660148, "grad_norm": 0.25328123569488525, "learning_rate": 8.930064566610052e-07, "loss": 0.7645, "step": 12810 }, { "epoch": 0.8720346909276422, "grad_norm": 0.2413000762462616, "learning_rate": 8.837376742267212e-07, "loss": 0.7623, "step": 12820 }, { "epoch": 0.8727149051951365, "grad_norm": 0.24539200961589813, "learning_rate": 8.745150206076947e-07, "loss": 0.7682, "step": 12830 }, { "epoch": 0.8733951194626307, "grad_norm": 0.23920084536075592, "learning_rate": 8.65338542471087e-07, "loss": 0.7726, "step": 12840 }, { "epoch": 0.874075333730125, "grad_norm": 0.262353777885437, "learning_rate": 8.562082862504096e-07, "loss": 0.7617, "step": 12850 }, { "epoch": 0.8747555479976192, "grad_norm": 0.24785573780536652, "learning_rate": 8.471242981452854e-07, "loss": 0.7559, "step": 12860 }, { "epoch": 0.8754357622651135, "grad_norm": 0.33901122212409973, "learning_rate": 8.380866241212226e-07, "loss": 0.7594, "step": 12870 }, { "epoch": 0.8761159765326078, "grad_norm": 0.24244165420532227, "learning_rate": 8.290953099093746e-07, "loss": 0.7632, "step": 12880 }, { "epoch": 0.8767961908001021, "grad_norm": 0.24455446004867554, "learning_rate": 8.20150401006311e-07, "loss": 0.7587, "step": 12890 }, { "epoch": 0.8774764050675963, "grad_norm": 0.24641729891300201, "learning_rate": 8.112519426737897e-07, "loss": 0.7516, "step": 12900 }, { "epoch": 0.8781566193350906, "grad_norm": 0.24412564933300018, "learning_rate": 8.023999799385252e-07, "loss": 0.7477, "step": 12910 }, { "epoch": 0.8788368336025848, "grad_norm": 0.24342086911201477, "learning_rate": 7.935945575919634e-07, "loss": 0.7478, "step": 12920 }, { "epoch": 0.8795170478700791, "grad_norm": 0.25551480054855347, "learning_rate": 7.848357201900469e-07, "loss": 0.7607, "step": 12930 }, { "epoch": 0.8801972621375733, "grad_norm": 0.24093754589557648, "learning_rate": 7.76123512053002e-07, "loss": 0.7677, "step": 12940 }, { "epoch": 0.8808774764050676, "grad_norm": 0.2472139298915863, "learning_rate": 7.67457977265107e-07, "loss": 0.7578, "step": 12950 }, { "epoch": 0.8815576906725618, "grad_norm": 0.23935164511203766, "learning_rate": 7.588391596744693e-07, "loss": 0.7664, "step": 12960 }, { "epoch": 0.8822379049400562, "grad_norm": 0.2518456280231476, "learning_rate": 7.502671028928043e-07, "loss": 0.7545, "step": 12970 }, { "epoch": 0.8829181192075504, "grad_norm": 0.24603936076164246, "learning_rate": 7.417418502952179e-07, "loss": 0.7616, "step": 12980 }, { "epoch": 0.8835983334750447, "grad_norm": 0.24553045630455017, "learning_rate": 7.332634450199804e-07, "loss": 0.7628, "step": 12990 }, { "epoch": 0.8842785477425389, "grad_norm": 0.2492409199476242, "learning_rate": 7.248319299683126e-07, "loss": 0.7605, "step": 13000 }, { "epoch": 0.8849587620100332, "grad_norm": 0.25094982981681824, "learning_rate": 7.164473478041701e-07, "loss": 0.7599, "step": 13010 }, { "epoch": 0.8856389762775274, "grad_norm": 0.2430187612771988, "learning_rate": 7.08109740954025e-07, "loss": 0.7631, "step": 13020 }, { "epoch": 0.8863191905450217, "grad_norm": 0.2483309805393219, "learning_rate": 6.998191516066488e-07, "loss": 0.7599, "step": 13030 }, { "epoch": 0.8869994048125159, "grad_norm": 0.24521982669830322, "learning_rate": 6.915756217129044e-07, "loss": 0.7664, "step": 13040 }, { "epoch": 0.8876796190800103, "grad_norm": 0.2494121789932251, "learning_rate": 6.833791929855294e-07, "loss": 0.7571, "step": 13050 }, { "epoch": 0.8883598333475045, "grad_norm": 0.25339776277542114, "learning_rate": 6.752299068989309e-07, "loss": 0.7596, "step": 13060 }, { "epoch": 0.8890400476149988, "grad_norm": 0.24048739671707153, "learning_rate": 6.67127804688965e-07, "loss": 0.7606, "step": 13070 }, { "epoch": 0.889720261882493, "grad_norm": 0.24579575657844543, "learning_rate": 6.590729273527407e-07, "loss": 0.7668, "step": 13080 }, { "epoch": 0.8904004761499873, "grad_norm": 0.25283282995224, "learning_rate": 6.510653156484026e-07, "loss": 0.7769, "step": 13090 }, { "epoch": 0.8910806904174815, "grad_norm": 0.2448589950799942, "learning_rate": 6.431050100949298e-07, "loss": 0.7564, "step": 13100 }, { "epoch": 0.8917609046849758, "grad_norm": 0.24270819127559662, "learning_rate": 6.351920509719289e-07, "loss": 0.7629, "step": 13110 }, { "epoch": 0.89244111895247, "grad_norm": 0.24784918129444122, "learning_rate": 6.273264783194311e-07, "loss": 0.7613, "step": 13120 }, { "epoch": 0.8931213332199643, "grad_norm": 0.2509154975414276, "learning_rate": 6.195083319376905e-07, "loss": 0.7609, "step": 13130 }, { "epoch": 0.8938015474874585, "grad_norm": 0.23716795444488525, "learning_rate": 6.117376513869789e-07, "loss": 0.7457, "step": 13140 }, { "epoch": 0.8944817617549529, "grad_norm": 0.2542048394680023, "learning_rate": 6.040144759873911e-07, "loss": 0.7659, "step": 13150 }, { "epoch": 0.8951619760224471, "grad_norm": 0.2590234577655792, "learning_rate": 5.963388448186424e-07, "loss": 0.7558, "step": 13160 }, { "epoch": 0.8958421902899414, "grad_norm": 0.23940609395503998, "learning_rate": 5.887107967198702e-07, "loss": 0.7651, "step": 13170 }, { "epoch": 0.8965224045574356, "grad_norm": 0.24657294154167175, "learning_rate": 5.811303702894389e-07, "loss": 0.7667, "step": 13180 }, { "epoch": 0.8972026188249298, "grad_norm": 0.36665135622024536, "learning_rate": 5.735976038847457e-07, "loss": 0.7738, "step": 13190 }, { "epoch": 0.8978828330924241, "grad_norm": 0.24272586405277252, "learning_rate": 5.661125356220254e-07, "loss": 0.7576, "step": 13200 }, { "epoch": 0.8985630473599183, "grad_norm": 0.23903925716876984, "learning_rate": 5.586752033761566e-07, "loss": 0.7587, "step": 13210 }, { "epoch": 0.8992432616274126, "grad_norm": 0.2517642080783844, "learning_rate": 5.512856447804704e-07, "loss": 0.768, "step": 13220 }, { "epoch": 0.8999234758949068, "grad_norm": 0.24130401015281677, "learning_rate": 5.43943897226562e-07, "loss": 0.7568, "step": 13230 }, { "epoch": 0.9006036901624012, "grad_norm": 0.24629628658294678, "learning_rate": 5.366499978640994e-07, "loss": 0.7722, "step": 13240 }, { "epoch": 0.9012839044298954, "grad_norm": 0.24640581011772156, "learning_rate": 5.294039836006348e-07, "loss": 0.7556, "step": 13250 }, { "epoch": 0.9019641186973897, "grad_norm": 0.2520070970058441, "learning_rate": 5.222058911014194e-07, "loss": 0.7649, "step": 13260 }, { "epoch": 0.9026443329648839, "grad_norm": 0.2524298131465912, "learning_rate": 5.150557567892201e-07, "loss": 0.7546, "step": 13270 }, { "epoch": 0.9033245472323782, "grad_norm": 0.24579280614852905, "learning_rate": 5.079536168441301e-07, "loss": 0.7676, "step": 13280 }, { "epoch": 0.9040047614998724, "grad_norm": 0.24815985560417175, "learning_rate": 5.008995072033873e-07, "loss": 0.7557, "step": 13290 }, { "epoch": 0.9046849757673667, "grad_norm": 0.24681419134140015, "learning_rate": 4.938934635611958e-07, "loss": 0.7564, "step": 13300 }, { "epoch": 0.9053651900348609, "grad_norm": 0.2422610968351364, "learning_rate": 4.869355213685423e-07, "loss": 0.7613, "step": 13310 }, { "epoch": 0.9060454043023553, "grad_norm": 0.2551460862159729, "learning_rate": 4.800257158330179e-07, "loss": 0.7667, "step": 13320 }, { "epoch": 0.9067256185698495, "grad_norm": 0.2425679713487625, "learning_rate": 4.7316408191863674e-07, "loss": 0.7599, "step": 13330 }, { "epoch": 0.9074058328373438, "grad_norm": 0.24537131190299988, "learning_rate": 4.663506543456653e-07, "loss": 0.7559, "step": 13340 }, { "epoch": 0.908086047104838, "grad_norm": 0.2449687421321869, "learning_rate": 4.5958546759044207e-07, "loss": 0.7633, "step": 13350 }, { "epoch": 0.9087662613723323, "grad_norm": 0.24791422486305237, "learning_rate": 4.5286855588520195e-07, "loss": 0.7508, "step": 13360 }, { "epoch": 0.9094464756398265, "grad_norm": 0.25494876503944397, "learning_rate": 4.4619995321790844e-07, "loss": 0.7538, "step": 13370 }, { "epoch": 0.9101266899073208, "grad_norm": 0.24482247233390808, "learning_rate": 4.395796933320773e-07, "loss": 0.7689, "step": 13380 }, { "epoch": 0.910806904174815, "grad_norm": 0.2454451620578766, "learning_rate": 4.330078097266055e-07, "loss": 0.7637, "step": 13390 }, { "epoch": 0.9114871184423093, "grad_norm": 0.2453595995903015, "learning_rate": 4.26484335655607e-07, "loss": 0.7475, "step": 13400 }, { "epoch": 0.9121673327098035, "grad_norm": 0.24535664916038513, "learning_rate": 4.2000930412823605e-07, "loss": 0.7622, "step": 13410 }, { "epoch": 0.9128475469772979, "grad_norm": 0.2451518476009369, "learning_rate": 4.135827479085286e-07, "loss": 0.7575, "step": 13420 }, { "epoch": 0.9135277612447921, "grad_norm": 0.24552904069423676, "learning_rate": 4.072046995152301e-07, "loss": 0.7614, "step": 13430 }, { "epoch": 0.9142079755122864, "grad_norm": 0.2504306137561798, "learning_rate": 4.0087519122163575e-07, "loss": 0.7638, "step": 13440 }, { "epoch": 0.9148881897797806, "grad_norm": 0.24239565432071686, "learning_rate": 3.945942550554238e-07, "loss": 0.7635, "step": 13450 }, { "epoch": 0.9155684040472749, "grad_norm": 0.243515282869339, "learning_rate": 3.8836192279849473e-07, "loss": 0.7525, "step": 13460 }, { "epoch": 0.9162486183147691, "grad_norm": 0.24457940459251404, "learning_rate": 3.821782259868101e-07, "loss": 0.7524, "step": 13470 }, { "epoch": 0.9169288325822634, "grad_norm": 0.24360902607440948, "learning_rate": 3.7604319591023507e-07, "loss": 0.7686, "step": 13480 }, { "epoch": 0.9176090468497576, "grad_norm": 0.24603000283241272, "learning_rate": 3.6995686361237625e-07, "loss": 0.7613, "step": 13490 }, { "epoch": 0.918289261117252, "grad_norm": 0.2504574954509735, "learning_rate": 3.639192598904284e-07, "loss": 0.7537, "step": 13500 }, { "epoch": 0.9189694753847462, "grad_norm": 0.24181614816188812, "learning_rate": 3.579304152950147e-07, "loss": 0.7624, "step": 13510 }, { "epoch": 0.9196496896522405, "grad_norm": 0.24688316881656647, "learning_rate": 3.519903601300367e-07, "loss": 0.7634, "step": 13520 }, { "epoch": 0.9203299039197347, "grad_norm": 0.2513571083545685, "learning_rate": 3.4609912445251693e-07, "loss": 0.744, "step": 13530 }, { "epoch": 0.921010118187229, "grad_norm": 0.2379947006702423, "learning_rate": 3.402567380724486e-07, "loss": 0.7657, "step": 13540 }, { "epoch": 0.9216903324547232, "grad_norm": 0.24739056825637817, "learning_rate": 3.344632305526463e-07, "loss": 0.7682, "step": 13550 }, { "epoch": 0.9223705467222175, "grad_norm": 0.24008318781852722, "learning_rate": 3.2871863120859214e-07, "loss": 0.7713, "step": 13560 }, { "epoch": 0.9230507609897117, "grad_norm": 0.24716557562351227, "learning_rate": 3.230229691082931e-07, "loss": 0.7652, "step": 13570 }, { "epoch": 0.923730975257206, "grad_norm": 0.24023090302944183, "learning_rate": 3.173762730721275e-07, "loss": 0.7695, "step": 13580 }, { "epoch": 0.9244111895247002, "grad_norm": 0.24779115617275238, "learning_rate": 3.117785716727062e-07, "loss": 0.7586, "step": 13590 }, { "epoch": 0.9250914037921946, "grad_norm": 0.24192939698696136, "learning_rate": 3.0622989323472076e-07, "loss": 0.765, "step": 13600 }, { "epoch": 0.9257716180596888, "grad_norm": 0.25075802206993103, "learning_rate": 3.0073026583480547e-07, "loss": 0.764, "step": 13610 }, { "epoch": 0.9264518323271831, "grad_norm": 0.24366381764411926, "learning_rate": 2.952797173013944e-07, "loss": 0.765, "step": 13620 }, { "epoch": 0.9271320465946773, "grad_norm": 0.2407902032136917, "learning_rate": 2.898782752145779e-07, "loss": 0.7531, "step": 13630 }, { "epoch": 0.9278122608621716, "grad_norm": 0.23994383215904236, "learning_rate": 2.845259669059663e-07, "loss": 0.754, "step": 13640 }, { "epoch": 0.9284924751296658, "grad_norm": 0.24762782454490662, "learning_rate": 2.7922281945855e-07, "loss": 0.7682, "step": 13650 }, { "epoch": 0.9291726893971601, "grad_norm": 0.24440613389015198, "learning_rate": 2.739688597065604e-07, "loss": 0.7613, "step": 13660 }, { "epoch": 0.9298529036646543, "grad_norm": 0.24806998670101166, "learning_rate": 2.6876411423534035e-07, "loss": 0.7621, "step": 13670 }, { "epoch": 0.9305331179321487, "grad_norm": 0.24591968953609467, "learning_rate": 2.6360860938120183e-07, "loss": 0.7594, "step": 13680 }, { "epoch": 0.9312133321996429, "grad_norm": 0.2511826157569885, "learning_rate": 2.585023712312973e-07, "loss": 0.7696, "step": 13690 }, { "epoch": 0.9318935464671372, "grad_norm": 0.2496713250875473, "learning_rate": 2.5344542562348974e-07, "loss": 0.7709, "step": 13700 }, { "epoch": 0.9325737607346314, "grad_norm": 0.2496761530637741, "learning_rate": 2.4843779814621496e-07, "loss": 0.7508, "step": 13710 }, { "epoch": 0.9332539750021257, "grad_norm": 0.24858562648296356, "learning_rate": 2.434795141383584e-07, "loss": 0.7638, "step": 13720 }, { "epoch": 0.9339341892696199, "grad_norm": 0.2461564838886261, "learning_rate": 2.3857059868912403e-07, "loss": 0.7679, "step": 13730 }, { "epoch": 0.9346144035371142, "grad_norm": 0.25012731552124023, "learning_rate": 2.3371107663790915e-07, "loss": 0.7547, "step": 13740 }, { "epoch": 0.9352946178046084, "grad_norm": 0.24220655858516693, "learning_rate": 2.2890097257417642e-07, "loss": 0.7612, "step": 13750 }, { "epoch": 0.9359748320721027, "grad_norm": 0.24822300672531128, "learning_rate": 2.241403108373297e-07, "loss": 0.7692, "step": 13760 }, { "epoch": 0.936655046339597, "grad_norm": 0.24931180477142334, "learning_rate": 2.1942911551659506e-07, "loss": 0.7642, "step": 13770 }, { "epoch": 0.9373352606070913, "grad_norm": 0.23831304907798767, "learning_rate": 2.1476741045089233e-07, "loss": 0.7628, "step": 13780 }, { "epoch": 0.9380154748745855, "grad_norm": 0.24508658051490784, "learning_rate": 2.101552192287193e-07, "loss": 0.7631, "step": 13790 }, { "epoch": 0.9386956891420798, "grad_norm": 0.25515463948249817, "learning_rate": 2.0559256518803082e-07, "loss": 0.7509, "step": 13800 }, { "epoch": 0.939375903409574, "grad_norm": 0.23935982584953308, "learning_rate": 2.0107947141612017e-07, "loss": 0.7531, "step": 13810 }, { "epoch": 0.9400561176770683, "grad_norm": 0.24096937477588654, "learning_rate": 1.9661596074950328e-07, "loss": 0.7443, "step": 13820 }, { "epoch": 0.9407363319445625, "grad_norm": 0.2419290691614151, "learning_rate": 1.9220205577380356e-07, "loss": 0.7635, "step": 13830 }, { "epoch": 0.9414165462120568, "grad_norm": 0.2487664669752121, "learning_rate": 1.8783777882363407e-07, "loss": 0.7668, "step": 13840 }, { "epoch": 0.942096760479551, "grad_norm": 0.24574416875839233, "learning_rate": 1.8352315198248983e-07, "loss": 0.7743, "step": 13850 }, { "epoch": 0.9427769747470454, "grad_norm": 0.2422151267528534, "learning_rate": 1.7925819708263347e-07, "loss": 0.7618, "step": 13860 }, { "epoch": 0.9434571890145396, "grad_norm": 0.2462805211544037, "learning_rate": 1.750429357049821e-07, "loss": 0.7595, "step": 13870 }, { "epoch": 0.9441374032820339, "grad_norm": 0.24129988253116608, "learning_rate": 1.7087738917900277e-07, "loss": 0.7594, "step": 13880 }, { "epoch": 0.9448176175495281, "grad_norm": 0.24333396553993225, "learning_rate": 1.6676157858260157e-07, "loss": 0.7543, "step": 13890 }, { "epoch": 0.9454978318170224, "grad_norm": 0.2515409290790558, "learning_rate": 1.6269552474202033e-07, "loss": 0.7598, "step": 13900 }, { "epoch": 0.9461780460845166, "grad_norm": 0.24224993586540222, "learning_rate": 1.5867924823172565e-07, "loss": 0.7595, "step": 13910 }, { "epoch": 0.9468582603520109, "grad_norm": 0.24030137062072754, "learning_rate": 1.5471276937431e-07, "loss": 0.7549, "step": 13920 }, { "epoch": 0.9475384746195051, "grad_norm": 0.2618497312068939, "learning_rate": 1.5079610824038416e-07, "loss": 0.7499, "step": 13930 }, { "epoch": 0.9482186888869995, "grad_norm": 0.25244298577308655, "learning_rate": 1.4692928464848266e-07, "loss": 0.7574, "step": 13940 }, { "epoch": 0.9488989031544937, "grad_norm": 0.2502358555793762, "learning_rate": 1.431123181649552e-07, "loss": 0.7658, "step": 13950 }, { "epoch": 0.949579117421988, "grad_norm": 0.24570995569229126, "learning_rate": 1.3934522810387429e-07, "loss": 0.7608, "step": 13960 }, { "epoch": 0.9502593316894822, "grad_norm": 0.24009110033512115, "learning_rate": 1.356280335269333e-07, "loss": 0.754, "step": 13970 }, { "epoch": 0.9509395459569765, "grad_norm": 0.2586895823478699, "learning_rate": 1.319607532433509e-07, "loss": 0.7631, "step": 13980 }, { "epoch": 0.9516197602244707, "grad_norm": 0.26034995913505554, "learning_rate": 1.283434058097799e-07, "loss": 0.7558, "step": 13990 }, { "epoch": 0.952299974491965, "grad_norm": 0.24782530963420868, "learning_rate": 1.247760095302064e-07, "loss": 0.7628, "step": 14000 }, { "epoch": 0.9529801887594592, "grad_norm": 0.24033108353614807, "learning_rate": 1.2125858245586207e-07, "loss": 0.7767, "step": 14010 }, { "epoch": 0.9536604030269535, "grad_norm": 0.24754656851291656, "learning_rate": 1.1779114238513189e-07, "loss": 0.7616, "step": 14020 }, { "epoch": 0.9543406172944477, "grad_norm": 0.2364259511232376, "learning_rate": 1.1437370686346427e-07, "loss": 0.7622, "step": 14030 }, { "epoch": 0.9550208315619421, "grad_norm": 0.24411220848560333, "learning_rate": 1.1100629318327894e-07, "loss": 0.7598, "step": 14040 }, { "epoch": 0.9557010458294363, "grad_norm": 0.24533218145370483, "learning_rate": 1.0768891838388584e-07, "loss": 0.7563, "step": 14050 }, { "epoch": 0.9563812600969306, "grad_norm": 0.24469947814941406, "learning_rate": 1.0442159925139306e-07, "loss": 0.7602, "step": 14060 }, { "epoch": 0.9570614743644248, "grad_norm": 0.23895448446273804, "learning_rate": 1.0120435231862346e-07, "loss": 0.7565, "step": 14070 }, { "epoch": 0.9577416886319191, "grad_norm": 0.24606747925281525, "learning_rate": 9.803719386503485e-08, "loss": 0.7534, "step": 14080 }, { "epoch": 0.9584219028994133, "grad_norm": 0.2557947635650635, "learning_rate": 9.492013991663107e-08, "loss": 0.7647, "step": 14090 }, { "epoch": 0.9591021171669076, "grad_norm": 0.24558596312999725, "learning_rate": 9.185320624588545e-08, "loss": 0.7526, "step": 14100 }, { "epoch": 0.9597823314344018, "grad_norm": 0.24695099890232086, "learning_rate": 8.883640837166085e-08, "loss": 0.7526, "step": 14110 }, { "epoch": 0.960462545701896, "grad_norm": 0.25338950753211975, "learning_rate": 8.586976155912863e-08, "loss": 0.748, "step": 14120 }, { "epoch": 0.9611427599693904, "grad_norm": 0.2424427717924118, "learning_rate": 8.295328081969312e-08, "loss": 0.7648, "step": 14130 }, { "epoch": 0.9618229742368846, "grad_norm": 0.24449896812438965, "learning_rate": 8.008698091091727e-08, "loss": 0.7526, "step": 14140 }, { "epoch": 0.9625031885043789, "grad_norm": 0.24283750355243683, "learning_rate": 7.72708763364427e-08, "loss": 0.7646, "step": 14150 }, { "epoch": 0.9631834027718731, "grad_norm": 0.2458605021238327, "learning_rate": 7.450498134592198e-08, "loss": 0.7537, "step": 14160 }, { "epoch": 0.9638636170393674, "grad_norm": 0.2411893755197525, "learning_rate": 7.178930993494538e-08, "loss": 0.7512, "step": 14170 }, { "epoch": 0.9645438313068616, "grad_norm": 0.24779365956783295, "learning_rate": 6.912387584496527e-08, "loss": 0.7537, "step": 14180 }, { "epoch": 0.9652240455743559, "grad_norm": 0.25667867064476013, "learning_rate": 6.650869256323522e-08, "loss": 0.7616, "step": 14190 }, { "epoch": 0.9659042598418501, "grad_norm": 0.2500806748867035, "learning_rate": 6.394377332273216e-08, "loss": 0.771, "step": 14200 }, { "epoch": 0.9665844741093444, "grad_norm": 0.2422896921634674, "learning_rate": 6.142913110209537e-08, "loss": 0.7524, "step": 14210 }, { "epoch": 0.9672646883768387, "grad_norm": 0.2571762800216675, "learning_rate": 5.896477862556205e-08, "loss": 0.7581, "step": 14220 }, { "epoch": 0.967944902644333, "grad_norm": 0.2567151188850403, "learning_rate": 5.6550728362896325e-08, "loss": 0.767, "step": 14230 }, { "epoch": 0.9686251169118272, "grad_norm": 0.24308769404888153, "learning_rate": 5.418699252933479e-08, "loss": 0.757, "step": 14240 }, { "epoch": 0.9693053311793215, "grad_norm": 0.2470419555902481, "learning_rate": 5.18735830855166e-08, "loss": 0.763, "step": 14250 }, { "epoch": 0.9699855454468157, "grad_norm": 0.2530462443828583, "learning_rate": 4.9610511737425706e-08, "loss": 0.7682, "step": 14260 }, { "epoch": 0.97066575971431, "grad_norm": 0.2385081797838211, "learning_rate": 4.739778993633648e-08, "loss": 0.7407, "step": 14270 }, { "epoch": 0.9713459739818042, "grad_norm": 0.23911146819591522, "learning_rate": 4.523542887874932e-08, "loss": 0.7549, "step": 14280 }, { "epoch": 0.9720261882492985, "grad_norm": 0.24873803555965424, "learning_rate": 4.312343950633513e-08, "loss": 0.7566, "step": 14290 }, { "epoch": 0.9727064025167927, "grad_norm": 0.2478393018245697, "learning_rate": 4.1061832505883135e-08, "loss": 0.7756, "step": 14300 }, { "epoch": 0.9733866167842871, "grad_norm": 0.23787696659564972, "learning_rate": 3.9050618309243173e-08, "loss": 0.7621, "step": 14310 }, { "epoch": 0.9740668310517813, "grad_norm": 0.24838638305664062, "learning_rate": 3.708980709327681e-08, "loss": 0.7502, "step": 14320 }, { "epoch": 0.9747470453192756, "grad_norm": 0.24176611006259918, "learning_rate": 3.517940877980186e-08, "loss": 0.7471, "step": 14330 }, { "epoch": 0.9754272595867698, "grad_norm": 0.24439072608947754, "learning_rate": 3.331943303554464e-08, "loss": 0.7585, "step": 14340 }, { "epoch": 0.9761074738542641, "grad_norm": 0.24630014598369598, "learning_rate": 3.1509889272089975e-08, "loss": 0.7579, "step": 14350 }, { "epoch": 0.9767876881217583, "grad_norm": 0.24432967603206635, "learning_rate": 2.975078664583353e-08, "loss": 0.7632, "step": 14360 }, { "epoch": 0.9774679023892526, "grad_norm": 0.2476637065410614, "learning_rate": 2.8042134057937322e-08, "loss": 0.7748, "step": 14370 }, { "epoch": 0.9781481166567468, "grad_norm": 0.2439027577638626, "learning_rate": 2.638394015428203e-08, "loss": 0.7567, "step": 14380 }, { "epoch": 0.9788283309242412, "grad_norm": 0.24583202600479126, "learning_rate": 2.477621332542368e-08, "loss": 0.7638, "step": 14390 }, { "epoch": 0.9795085451917354, "grad_norm": 0.24024732410907745, "learning_rate": 2.321896170655591e-08, "loss": 0.7626, "step": 14400 }, { "epoch": 0.9801887594592297, "grad_norm": 0.4252866804599762, "learning_rate": 2.1712193177459985e-08, "loss": 0.7514, "step": 14410 }, { "epoch": 0.9808689737267239, "grad_norm": 0.24549369513988495, "learning_rate": 2.0255915362471513e-08, "loss": 0.7541, "step": 14420 }, { "epoch": 0.9815491879942182, "grad_norm": 0.24518539011478424, "learning_rate": 1.885013563044158e-08, "loss": 0.7499, "step": 14430 }, { "epoch": 0.9822294022617124, "grad_norm": 0.2515625059604645, "learning_rate": 1.749486109469789e-08, "loss": 0.7779, "step": 14440 }, { "epoch": 0.9829096165292067, "grad_norm": 0.23486600816249847, "learning_rate": 1.6190098613005913e-08, "loss": 0.7476, "step": 14450 }, { "epoch": 0.9835898307967009, "grad_norm": 0.24871693551540375, "learning_rate": 1.4935854787541114e-08, "loss": 0.7589, "step": 14460 }, { "epoch": 0.9842700450641952, "grad_norm": 0.24605490267276764, "learning_rate": 1.3732135964849013e-08, "loss": 0.7721, "step": 14470 }, { "epoch": 0.9849502593316894, "grad_norm": 0.24189579486846924, "learning_rate": 1.2578948235817402e-08, "loss": 0.7671, "step": 14480 }, { "epoch": 0.9856304735991838, "grad_norm": 0.24505004286766052, "learning_rate": 1.1476297435641937e-08, "loss": 0.7634, "step": 14490 }, { "epoch": 0.986310687866678, "grad_norm": 0.24500903487205505, "learning_rate": 1.0424189143799502e-08, "loss": 0.7758, "step": 14500 }, { "epoch": 0.9869909021341723, "grad_norm": 0.24318064749240875, "learning_rate": 9.42262868401933e-09, "loss": 0.743, "step": 14510 }, { "epoch": 0.9876711164016665, "grad_norm": 0.24455608427524567, "learning_rate": 8.471621124256368e-09, "loss": 0.7589, "step": 14520 }, { "epoch": 0.9883513306691608, "grad_norm": 0.24279429018497467, "learning_rate": 7.571171276662403e-09, "loss": 0.7716, "step": 14530 }, { "epoch": 0.989031544936655, "grad_norm": 0.24775396287441254, "learning_rate": 6.721283697566084e-09, "loss": 0.7686, "step": 14540 }, { "epoch": 0.9897117592041493, "grad_norm": 0.25869980454444885, "learning_rate": 5.92196268744849e-09, "loss": 0.7597, "step": 14550 }, { "epoch": 0.9903919734716435, "grad_norm": 0.2435072809457779, "learning_rate": 5.173212290919827e-09, "loss": 0.749, "step": 14560 }, { "epoch": 0.9910721877391379, "grad_norm": 0.24581976234912872, "learning_rate": 4.4750362967005415e-09, "loss": 0.7495, "step": 14570 }, { "epoch": 0.9917524020066321, "grad_norm": 0.241951122879982, "learning_rate": 3.827438237601344e-09, "loss": 0.757, "step": 14580 }, { "epoch": 0.9924326162741264, "grad_norm": 0.24407359957695007, "learning_rate": 3.230421390506555e-09, "loss": 0.7578, "step": 14590 }, { "epoch": 0.9931128305416206, "grad_norm": 0.24469362199306488, "learning_rate": 2.6839887763552287e-09, "loss": 0.7655, "step": 14600 }, { "epoch": 0.9937930448091149, "grad_norm": 0.23985865712165833, "learning_rate": 2.1881431601278315e-09, "loss": 0.7755, "step": 14610 }, { "epoch": 0.9944732590766091, "grad_norm": 0.2478683441877365, "learning_rate": 1.7428870508329199e-09, "loss": 0.7662, "step": 14620 }, { "epoch": 0.9951534733441034, "grad_norm": 0.24447156488895416, "learning_rate": 1.3482227014915973e-09, "loss": 0.7629, "step": 14630 }, { "epoch": 0.9958336876115976, "grad_norm": 0.24857555329799652, "learning_rate": 1.0041521091275209e-09, "loss": 0.7588, "step": 14640 }, { "epoch": 0.996513901879092, "grad_norm": 0.24451036751270294, "learning_rate": 7.106770147602415e-10, "loss": 0.7621, "step": 14650 }, { "epoch": 0.9971941161465862, "grad_norm": 0.2504979372024536, "learning_rate": 4.677989033874397e-10, "loss": 0.7528, "step": 14660 }, { "epoch": 0.9978743304140805, "grad_norm": 0.24821937084197998, "learning_rate": 2.7551900398936625e-10, "loss": 0.7599, "step": 14670 }, { "epoch": 0.9985545446815747, "grad_norm": 0.2521255612373352, "learning_rate": 1.3383828951218924e-10, "loss": 0.7507, "step": 14680 }, { "epoch": 0.999234758949069, "grad_norm": 0.2424987554550171, "learning_rate": 4.275747686799392e-11, "loss": 0.7597, "step": 14690 }, { "epoch": 0.9999149732165632, "grad_norm": 0.2516481280326843, "learning_rate": 2.277026931452042e-12, "loss": 0.7617, "step": 14700 } ], "logging_steps": 10, "max_steps": 14702, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.302340990111685e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }