{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9995788291450234, "eval_steps": 500, "global_step": 7122, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004211708549768356, "grad_norm": 4.967220783233643, "learning_rate": 1.4025245441795231e-08, "loss": 0.8344, "step": 1 }, { "epoch": 0.0008423417099536712, "grad_norm": 5.334061145782471, "learning_rate": 2.8050490883590463e-08, "loss": 0.8283, "step": 2 }, { "epoch": 0.0012635125649305067, "grad_norm": 5.236204624176025, "learning_rate": 4.2075736325385696e-08, "loss": 0.8749, "step": 3 }, { "epoch": 0.0016846834199073424, "grad_norm": 4.962848663330078, "learning_rate": 5.6100981767180926e-08, "loss": 0.8444, "step": 4 }, { "epoch": 0.002105854274884178, "grad_norm": 5.260888576507568, "learning_rate": 7.012622720897616e-08, "loss": 0.8134, "step": 5 }, { "epoch": 0.0025270251298610135, "grad_norm": 5.069085121154785, "learning_rate": 8.415147265077139e-08, "loss": 0.8354, "step": 6 }, { "epoch": 0.002948195984837849, "grad_norm": 5.073725700378418, "learning_rate": 9.817671809256662e-08, "loss": 0.8243, "step": 7 }, { "epoch": 0.0033693668398146847, "grad_norm": 5.138651371002197, "learning_rate": 1.1220196353436185e-07, "loss": 0.8529, "step": 8 }, { "epoch": 0.0037905376947915204, "grad_norm": 5.167470455169678, "learning_rate": 1.2622720897615708e-07, "loss": 0.8243, "step": 9 }, { "epoch": 0.004211708549768356, "grad_norm": 5.310710906982422, "learning_rate": 1.4025245441795232e-07, "loss": 0.8393, "step": 10 }, { "epoch": 0.004632879404745192, "grad_norm": 4.674258232116699, "learning_rate": 1.5427769985974757e-07, "loss": 0.788, "step": 11 }, { "epoch": 0.005054050259722027, "grad_norm": 5.221978187561035, "learning_rate": 1.6830294530154278e-07, "loss": 0.8174, "step": 12 }, { "epoch": 0.005475221114698863, "grad_norm": 5.053150177001953, "learning_rate": 1.8232819074333803e-07, "loss": 0.8061, "step": 13 }, { "epoch": 0.005896391969675698, "grad_norm": 4.927524566650391, "learning_rate": 1.9635343618513324e-07, "loss": 0.8119, "step": 14 }, { "epoch": 0.006317562824652534, "grad_norm": 4.9689249992370605, "learning_rate": 2.1037868162692849e-07, "loss": 0.8369, "step": 15 }, { "epoch": 0.0067387336796293695, "grad_norm": 4.7860798835754395, "learning_rate": 2.244039270687237e-07, "loss": 0.7945, "step": 16 }, { "epoch": 0.007159904534606206, "grad_norm": 4.837182998657227, "learning_rate": 2.3842917251051895e-07, "loss": 0.8093, "step": 17 }, { "epoch": 0.007581075389583041, "grad_norm": 4.924511909484863, "learning_rate": 2.5245441795231416e-07, "loss": 0.8468, "step": 18 }, { "epoch": 0.008002246244559877, "grad_norm": 4.385269641876221, "learning_rate": 2.6647966339410943e-07, "loss": 0.7758, "step": 19 }, { "epoch": 0.008423417099536711, "grad_norm": 4.772087574005127, "learning_rate": 2.8050490883590465e-07, "loss": 0.8414, "step": 20 }, { "epoch": 0.008844587954513547, "grad_norm": 4.347904205322266, "learning_rate": 2.9453015427769986e-07, "loss": 0.7925, "step": 21 }, { "epoch": 0.009265758809490383, "grad_norm": 4.650535583496094, "learning_rate": 3.0855539971949513e-07, "loss": 0.7927, "step": 22 }, { "epoch": 0.00968692966446722, "grad_norm": 4.299376964569092, "learning_rate": 3.2258064516129035e-07, "loss": 0.7963, "step": 23 }, { "epoch": 0.010108100519444054, "grad_norm": 4.408743381500244, "learning_rate": 3.3660589060308557e-07, "loss": 0.7664, "step": 24 }, { "epoch": 0.01052927137442089, "grad_norm": 4.31144380569458, "learning_rate": 3.506311360448808e-07, "loss": 0.752, "step": 25 }, { "epoch": 0.010950442229397726, "grad_norm": 3.7746188640594482, "learning_rate": 3.6465638148667605e-07, "loss": 0.76, "step": 26 }, { "epoch": 0.011371613084374562, "grad_norm": 3.9766016006469727, "learning_rate": 3.7868162692847127e-07, "loss": 0.8097, "step": 27 }, { "epoch": 0.011792783939351396, "grad_norm": 3.720290422439575, "learning_rate": 3.927068723702665e-07, "loss": 0.7669, "step": 28 }, { "epoch": 0.012213954794328232, "grad_norm": 3.9470908641815186, "learning_rate": 4.0673211781206176e-07, "loss": 0.8145, "step": 29 }, { "epoch": 0.012635125649305069, "grad_norm": 3.8003785610198975, "learning_rate": 4.2075736325385697e-07, "loss": 0.7769, "step": 30 }, { "epoch": 0.013056296504281903, "grad_norm": 3.9887502193450928, "learning_rate": 4.347826086956522e-07, "loss": 0.802, "step": 31 }, { "epoch": 0.013477467359258739, "grad_norm": 3.6415748596191406, "learning_rate": 4.488078541374474e-07, "loss": 0.7715, "step": 32 }, { "epoch": 0.013898638214235575, "grad_norm": 3.477673292160034, "learning_rate": 4.628330995792427e-07, "loss": 0.7308, "step": 33 }, { "epoch": 0.014319809069212411, "grad_norm": 3.5406494140625, "learning_rate": 4.768583450210379e-07, "loss": 0.7265, "step": 34 }, { "epoch": 0.014740979924189246, "grad_norm": 2.6460251808166504, "learning_rate": 4.908835904628331e-07, "loss": 0.7655, "step": 35 }, { "epoch": 0.015162150779166082, "grad_norm": 2.2205991744995117, "learning_rate": 5.049088359046283e-07, "loss": 0.7406, "step": 36 }, { "epoch": 0.015583321634142918, "grad_norm": 2.058171272277832, "learning_rate": 5.189340813464235e-07, "loss": 0.7149, "step": 37 }, { "epoch": 0.016004492489119754, "grad_norm": 2.0168025493621826, "learning_rate": 5.329593267882189e-07, "loss": 0.7635, "step": 38 }, { "epoch": 0.016425663344096588, "grad_norm": 2.0973944664001465, "learning_rate": 5.469845722300141e-07, "loss": 0.7611, "step": 39 }, { "epoch": 0.016846834199073422, "grad_norm": 1.95995032787323, "learning_rate": 5.610098176718093e-07, "loss": 0.7592, "step": 40 }, { "epoch": 0.01726800505405026, "grad_norm": 1.914425015449524, "learning_rate": 5.750350631136045e-07, "loss": 0.7421, "step": 41 }, { "epoch": 0.017689175909027095, "grad_norm": 1.892782211303711, "learning_rate": 5.890603085553997e-07, "loss": 0.7824, "step": 42 }, { "epoch": 0.018110346764003932, "grad_norm": 1.9045207500457764, "learning_rate": 6.030855539971949e-07, "loss": 0.771, "step": 43 }, { "epoch": 0.018531517618980767, "grad_norm": 1.7345590591430664, "learning_rate": 6.171107994389903e-07, "loss": 0.7253, "step": 44 }, { "epoch": 0.0189526884739576, "grad_norm": 1.658844232559204, "learning_rate": 6.311360448807855e-07, "loss": 0.7539, "step": 45 }, { "epoch": 0.01937385932893444, "grad_norm": 1.622214913368225, "learning_rate": 6.451612903225807e-07, "loss": 0.7576, "step": 46 }, { "epoch": 0.019795030183911273, "grad_norm": 1.4698289632797241, "learning_rate": 6.591865357643759e-07, "loss": 0.7551, "step": 47 }, { "epoch": 0.020216201038888108, "grad_norm": 1.381988525390625, "learning_rate": 6.732117812061711e-07, "loss": 0.7304, "step": 48 }, { "epoch": 0.020637371893864945, "grad_norm": 1.4191383123397827, "learning_rate": 6.872370266479664e-07, "loss": 0.7245, "step": 49 }, { "epoch": 0.02105854274884178, "grad_norm": 1.4603327512741089, "learning_rate": 7.012622720897616e-07, "loss": 0.6934, "step": 50 }, { "epoch": 0.021479713603818614, "grad_norm": 1.6544220447540283, "learning_rate": 7.152875175315569e-07, "loss": 0.6822, "step": 51 }, { "epoch": 0.021900884458795452, "grad_norm": 1.6188528537750244, "learning_rate": 7.293127629733521e-07, "loss": 0.6896, "step": 52 }, { "epoch": 0.022322055313772286, "grad_norm": 1.6348211765289307, "learning_rate": 7.433380084151473e-07, "loss": 0.6704, "step": 53 }, { "epoch": 0.022743226168749124, "grad_norm": 1.6582608222961426, "learning_rate": 7.573632538569425e-07, "loss": 0.7421, "step": 54 }, { "epoch": 0.02316439702372596, "grad_norm": 1.4734224081039429, "learning_rate": 7.713884992987378e-07, "loss": 0.672, "step": 55 }, { "epoch": 0.023585567878702793, "grad_norm": 1.483731746673584, "learning_rate": 7.85413744740533e-07, "loss": 0.6951, "step": 56 }, { "epoch": 0.02400673873367963, "grad_norm": 1.2693761587142944, "learning_rate": 7.994389901823282e-07, "loss": 0.653, "step": 57 }, { "epoch": 0.024427909588656465, "grad_norm": 1.150406002998352, "learning_rate": 8.134642356241235e-07, "loss": 0.6934, "step": 58 }, { "epoch": 0.0248490804436333, "grad_norm": 1.0852395296096802, "learning_rate": 8.274894810659187e-07, "loss": 0.753, "step": 59 }, { "epoch": 0.025270251298610137, "grad_norm": 0.978653609752655, "learning_rate": 8.415147265077139e-07, "loss": 0.6823, "step": 60 }, { "epoch": 0.02569142215358697, "grad_norm": 0.9217468500137329, "learning_rate": 8.555399719495092e-07, "loss": 0.6677, "step": 61 }, { "epoch": 0.026112593008563806, "grad_norm": 0.8968811631202698, "learning_rate": 8.695652173913044e-07, "loss": 0.6837, "step": 62 }, { "epoch": 0.026533763863540644, "grad_norm": 0.9098145365715027, "learning_rate": 8.835904628330996e-07, "loss": 0.6876, "step": 63 }, { "epoch": 0.026954934718517478, "grad_norm": 0.8886929750442505, "learning_rate": 8.976157082748948e-07, "loss": 0.6551, "step": 64 }, { "epoch": 0.027376105573494316, "grad_norm": 0.9709916114807129, "learning_rate": 9.116409537166901e-07, "loss": 0.6225, "step": 65 }, { "epoch": 0.02779727642847115, "grad_norm": 0.8641489744186401, "learning_rate": 9.256661991584853e-07, "loss": 0.6614, "step": 66 }, { "epoch": 0.028218447283447985, "grad_norm": 0.8124551177024841, "learning_rate": 9.396914446002806e-07, "loss": 0.6781, "step": 67 }, { "epoch": 0.028639618138424822, "grad_norm": 0.8619871735572815, "learning_rate": 9.537166900420758e-07, "loss": 0.6754, "step": 68 }, { "epoch": 0.029060788993401657, "grad_norm": 0.7940932512283325, "learning_rate": 9.67741935483871e-07, "loss": 0.6663, "step": 69 }, { "epoch": 0.02948195984837849, "grad_norm": 0.7499744296073914, "learning_rate": 9.817671809256662e-07, "loss": 0.6383, "step": 70 }, { "epoch": 0.02990313070335533, "grad_norm": 0.8306282162666321, "learning_rate": 9.957924263674614e-07, "loss": 0.5974, "step": 71 }, { "epoch": 0.030324301558332163, "grad_norm": 0.7683479189872742, "learning_rate": 1.0098176718092566e-06, "loss": 0.6668, "step": 72 }, { "epoch": 0.030745472413308998, "grad_norm": 0.8149742484092712, "learning_rate": 1.0238429172510519e-06, "loss": 0.6471, "step": 73 }, { "epoch": 0.031166643268285835, "grad_norm": 0.7280018329620361, "learning_rate": 1.037868162692847e-06, "loss": 0.6653, "step": 74 }, { "epoch": 0.03158781412326267, "grad_norm": 0.6832787990570068, "learning_rate": 1.0518934081346425e-06, "loss": 0.6181, "step": 75 }, { "epoch": 0.03200898497823951, "grad_norm": 0.6544724702835083, "learning_rate": 1.0659186535764377e-06, "loss": 0.6212, "step": 76 }, { "epoch": 0.03243015583321634, "grad_norm": 0.7515483498573303, "learning_rate": 1.079943899018233e-06, "loss": 0.6167, "step": 77 }, { "epoch": 0.032851326688193176, "grad_norm": 0.7381707429885864, "learning_rate": 1.0939691444600282e-06, "loss": 0.6587, "step": 78 }, { "epoch": 0.033272497543170014, "grad_norm": 0.6488091945648193, "learning_rate": 1.1079943899018234e-06, "loss": 0.6534, "step": 79 }, { "epoch": 0.033693668398146845, "grad_norm": 0.7132005095481873, "learning_rate": 1.1220196353436186e-06, "loss": 0.6, "step": 80 }, { "epoch": 0.03411483925312368, "grad_norm": 0.6447651982307434, "learning_rate": 1.1360448807854138e-06, "loss": 0.6191, "step": 81 }, { "epoch": 0.03453601010810052, "grad_norm": 0.7397660613059998, "learning_rate": 1.150070126227209e-06, "loss": 0.6718, "step": 82 }, { "epoch": 0.03495718096307736, "grad_norm": 0.669383704662323, "learning_rate": 1.1640953716690042e-06, "loss": 0.6069, "step": 83 }, { "epoch": 0.03537835181805419, "grad_norm": 0.6563029289245605, "learning_rate": 1.1781206171107995e-06, "loss": 0.6284, "step": 84 }, { "epoch": 0.03579952267303103, "grad_norm": 0.6697090864181519, "learning_rate": 1.1921458625525947e-06, "loss": 0.6169, "step": 85 }, { "epoch": 0.036220693528007865, "grad_norm": 0.6525166034698486, "learning_rate": 1.2061711079943899e-06, "loss": 0.6286, "step": 86 }, { "epoch": 0.036641864382984696, "grad_norm": 0.6043281555175781, "learning_rate": 1.2201963534361851e-06, "loss": 0.6042, "step": 87 }, { "epoch": 0.037063035237961534, "grad_norm": 0.616651177406311, "learning_rate": 1.2342215988779805e-06, "loss": 0.5786, "step": 88 }, { "epoch": 0.03748420609293837, "grad_norm": 0.6054384112358093, "learning_rate": 1.2482468443197758e-06, "loss": 0.6326, "step": 89 }, { "epoch": 0.0379053769479152, "grad_norm": 0.6263764500617981, "learning_rate": 1.262272089761571e-06, "loss": 0.605, "step": 90 }, { "epoch": 0.03832654780289204, "grad_norm": 0.6078124046325684, "learning_rate": 1.2762973352033662e-06, "loss": 0.6201, "step": 91 }, { "epoch": 0.03874771865786888, "grad_norm": 0.5822167992591858, "learning_rate": 1.2903225806451614e-06, "loss": 0.6337, "step": 92 }, { "epoch": 0.03916888951284571, "grad_norm": 0.5731845498085022, "learning_rate": 1.3043478260869566e-06, "loss": 0.5959, "step": 93 }, { "epoch": 0.03959006036782255, "grad_norm": 0.6212044358253479, "learning_rate": 1.3183730715287518e-06, "loss": 0.6339, "step": 94 }, { "epoch": 0.040011231222799384, "grad_norm": 0.4984055459499359, "learning_rate": 1.332398316970547e-06, "loss": 0.5689, "step": 95 }, { "epoch": 0.040432402077776215, "grad_norm": 0.6381989121437073, "learning_rate": 1.3464235624123423e-06, "loss": 0.6048, "step": 96 }, { "epoch": 0.04085357293275305, "grad_norm": 0.7002402544021606, "learning_rate": 1.3604488078541375e-06, "loss": 0.6191, "step": 97 }, { "epoch": 0.04127474378772989, "grad_norm": 0.5686971545219421, "learning_rate": 1.3744740532959327e-06, "loss": 0.6353, "step": 98 }, { "epoch": 0.04169591464270672, "grad_norm": 0.6315195560455322, "learning_rate": 1.388499298737728e-06, "loss": 0.588, "step": 99 }, { "epoch": 0.04211708549768356, "grad_norm": 0.6590785980224609, "learning_rate": 1.4025245441795231e-06, "loss": 0.5926, "step": 100 }, { "epoch": 0.0425382563526604, "grad_norm": 0.5607002377510071, "learning_rate": 1.4165497896213184e-06, "loss": 0.5974, "step": 101 }, { "epoch": 0.04295942720763723, "grad_norm": 0.6247360110282898, "learning_rate": 1.4305750350631138e-06, "loss": 0.6128, "step": 102 }, { "epoch": 0.043380598062614066, "grad_norm": 0.5741599202156067, "learning_rate": 1.444600280504909e-06, "loss": 0.5635, "step": 103 }, { "epoch": 0.043801768917590904, "grad_norm": 0.5754412412643433, "learning_rate": 1.4586255259467042e-06, "loss": 0.5941, "step": 104 }, { "epoch": 0.04422293977256774, "grad_norm": 0.5413808822631836, "learning_rate": 1.4726507713884994e-06, "loss": 0.5908, "step": 105 }, { "epoch": 0.04464411062754457, "grad_norm": 0.5614799857139587, "learning_rate": 1.4866760168302946e-06, "loss": 0.5789, "step": 106 }, { "epoch": 0.04506528148252141, "grad_norm": 0.5368629693984985, "learning_rate": 1.5007012622720899e-06, "loss": 0.5831, "step": 107 }, { "epoch": 0.04548645233749825, "grad_norm": 0.6294327974319458, "learning_rate": 1.514726507713885e-06, "loss": 0.5961, "step": 108 }, { "epoch": 0.04590762319247508, "grad_norm": 0.5866228938102722, "learning_rate": 1.5287517531556803e-06, "loss": 0.5923, "step": 109 }, { "epoch": 0.04632879404745192, "grad_norm": 0.6204866170883179, "learning_rate": 1.5427769985974755e-06, "loss": 0.6197, "step": 110 }, { "epoch": 0.046749964902428755, "grad_norm": 0.6642231941223145, "learning_rate": 1.5568022440392707e-06, "loss": 0.6146, "step": 111 }, { "epoch": 0.047171135757405586, "grad_norm": 0.5487833023071289, "learning_rate": 1.570827489481066e-06, "loss": 0.5954, "step": 112 }, { "epoch": 0.04759230661238242, "grad_norm": 0.5657860040664673, "learning_rate": 1.5848527349228612e-06, "loss": 0.6066, "step": 113 }, { "epoch": 0.04801347746735926, "grad_norm": 0.5952433943748474, "learning_rate": 1.5988779803646564e-06, "loss": 0.5902, "step": 114 }, { "epoch": 0.04843464832233609, "grad_norm": 0.5618955492973328, "learning_rate": 1.6129032258064516e-06, "loss": 0.611, "step": 115 }, { "epoch": 0.04885581917731293, "grad_norm": 0.520534336566925, "learning_rate": 1.626928471248247e-06, "loss": 0.6132, "step": 116 }, { "epoch": 0.04927699003228977, "grad_norm": 0.6123011112213135, "learning_rate": 1.6409537166900422e-06, "loss": 0.5821, "step": 117 }, { "epoch": 0.0496981608872666, "grad_norm": 0.5911686420440674, "learning_rate": 1.6549789621318375e-06, "loss": 0.5841, "step": 118 }, { "epoch": 0.050119331742243436, "grad_norm": 0.5573382377624512, "learning_rate": 1.6690042075736327e-06, "loss": 0.5952, "step": 119 }, { "epoch": 0.050540502597220274, "grad_norm": 0.6292282342910767, "learning_rate": 1.6830294530154279e-06, "loss": 0.5934, "step": 120 }, { "epoch": 0.050961673452197105, "grad_norm": 0.5239071846008301, "learning_rate": 1.697054698457223e-06, "loss": 0.5834, "step": 121 }, { "epoch": 0.05138284430717394, "grad_norm": 0.6028488874435425, "learning_rate": 1.7110799438990183e-06, "loss": 0.6105, "step": 122 }, { "epoch": 0.05180401516215078, "grad_norm": 0.6065652370452881, "learning_rate": 1.7251051893408135e-06, "loss": 0.5604, "step": 123 }, { "epoch": 0.05222518601712761, "grad_norm": 0.5351958274841309, "learning_rate": 1.7391304347826088e-06, "loss": 0.6003, "step": 124 }, { "epoch": 0.05264635687210445, "grad_norm": 0.6278284192085266, "learning_rate": 1.753155680224404e-06, "loss": 0.5832, "step": 125 }, { "epoch": 0.05306752772708129, "grad_norm": 0.5678454041481018, "learning_rate": 1.7671809256661992e-06, "loss": 0.5792, "step": 126 }, { "epoch": 0.05348869858205812, "grad_norm": 0.6130309700965881, "learning_rate": 1.7812061711079944e-06, "loss": 0.5645, "step": 127 }, { "epoch": 0.053909869437034956, "grad_norm": 0.6493215560913086, "learning_rate": 1.7952314165497896e-06, "loss": 0.5677, "step": 128 }, { "epoch": 0.054331040292011794, "grad_norm": 0.5458422303199768, "learning_rate": 1.809256661991585e-06, "loss": 0.5858, "step": 129 }, { "epoch": 0.05475221114698863, "grad_norm": 0.542142927646637, "learning_rate": 1.8232819074333803e-06, "loss": 0.5924, "step": 130 }, { "epoch": 0.05517338200196546, "grad_norm": 0.5885812640190125, "learning_rate": 1.8373071528751755e-06, "loss": 0.6257, "step": 131 }, { "epoch": 0.0555945528569423, "grad_norm": 0.5516620874404907, "learning_rate": 1.8513323983169707e-06, "loss": 0.5971, "step": 132 }, { "epoch": 0.05601572371191914, "grad_norm": 0.6224496364593506, "learning_rate": 1.865357643758766e-06, "loss": 0.5989, "step": 133 }, { "epoch": 0.05643689456689597, "grad_norm": 0.6465047597885132, "learning_rate": 1.8793828892005611e-06, "loss": 0.6148, "step": 134 }, { "epoch": 0.05685806542187281, "grad_norm": 0.5349046587944031, "learning_rate": 1.8934081346423563e-06, "loss": 0.5714, "step": 135 }, { "epoch": 0.057279236276849645, "grad_norm": 0.5290235877037048, "learning_rate": 1.9074333800841516e-06, "loss": 0.5682, "step": 136 }, { "epoch": 0.057700407131826476, "grad_norm": 0.5750214457511902, "learning_rate": 1.9214586255259468e-06, "loss": 0.5247, "step": 137 }, { "epoch": 0.05812157798680331, "grad_norm": 0.6091398596763611, "learning_rate": 1.935483870967742e-06, "loss": 0.6191, "step": 138 }, { "epoch": 0.05854274884178015, "grad_norm": 0.6190043091773987, "learning_rate": 1.9495091164095372e-06, "loss": 0.6099, "step": 139 }, { "epoch": 0.05896391969675698, "grad_norm": 0.6020250916481018, "learning_rate": 1.9635343618513324e-06, "loss": 0.5767, "step": 140 }, { "epoch": 0.05938509055173382, "grad_norm": 0.5920841097831726, "learning_rate": 1.9775596072931276e-06, "loss": 0.5949, "step": 141 }, { "epoch": 0.05980626140671066, "grad_norm": 0.5506113171577454, "learning_rate": 1.991584852734923e-06, "loss": 0.5162, "step": 142 }, { "epoch": 0.06022743226168749, "grad_norm": 0.5210670232772827, "learning_rate": 2.005610098176718e-06, "loss": 0.5384, "step": 143 }, { "epoch": 0.060648603116664326, "grad_norm": 0.6619000434875488, "learning_rate": 2.0196353436185133e-06, "loss": 0.5945, "step": 144 }, { "epoch": 0.061069773971641164, "grad_norm": 0.6046743988990784, "learning_rate": 2.0336605890603085e-06, "loss": 0.6181, "step": 145 }, { "epoch": 0.061490944826617995, "grad_norm": 0.5459643006324768, "learning_rate": 2.0476858345021037e-06, "loss": 0.5598, "step": 146 }, { "epoch": 0.06191211568159483, "grad_norm": 0.605736494064331, "learning_rate": 2.061711079943899e-06, "loss": 0.5809, "step": 147 }, { "epoch": 0.06233328653657167, "grad_norm": 0.5539777874946594, "learning_rate": 2.075736325385694e-06, "loss": 0.6291, "step": 148 }, { "epoch": 0.0627544573915485, "grad_norm": 0.6239675879478455, "learning_rate": 2.08976157082749e-06, "loss": 0.5717, "step": 149 }, { "epoch": 0.06317562824652534, "grad_norm": 0.608752965927124, "learning_rate": 2.103786816269285e-06, "loss": 0.5714, "step": 150 }, { "epoch": 0.06359679910150218, "grad_norm": 0.6453249454498291, "learning_rate": 2.1178120617110802e-06, "loss": 0.5887, "step": 151 }, { "epoch": 0.06401796995647902, "grad_norm": 0.5232036709785461, "learning_rate": 2.1318373071528755e-06, "loss": 0.564, "step": 152 }, { "epoch": 0.06443914081145585, "grad_norm": 0.5497357249259949, "learning_rate": 2.1458625525946707e-06, "loss": 0.5639, "step": 153 }, { "epoch": 0.06486031166643268, "grad_norm": 0.6084002256393433, "learning_rate": 2.159887798036466e-06, "loss": 0.5882, "step": 154 }, { "epoch": 0.06528148252140951, "grad_norm": 0.5970067381858826, "learning_rate": 2.173913043478261e-06, "loss": 0.6064, "step": 155 }, { "epoch": 0.06570265337638635, "grad_norm": 0.5404170751571655, "learning_rate": 2.1879382889200563e-06, "loss": 0.5595, "step": 156 }, { "epoch": 0.06612382423136319, "grad_norm": 0.6438302993774414, "learning_rate": 2.2019635343618515e-06, "loss": 0.5596, "step": 157 }, { "epoch": 0.06654499508634003, "grad_norm": 0.6056437492370605, "learning_rate": 2.2159887798036468e-06, "loss": 0.5461, "step": 158 }, { "epoch": 0.06696616594131687, "grad_norm": 0.5973242521286011, "learning_rate": 2.230014025245442e-06, "loss": 0.59, "step": 159 }, { "epoch": 0.06738733679629369, "grad_norm": 0.6734510660171509, "learning_rate": 2.244039270687237e-06, "loss": 0.5904, "step": 160 }, { "epoch": 0.06780850765127053, "grad_norm": 0.649156391620636, "learning_rate": 2.2580645161290324e-06, "loss": 0.5843, "step": 161 }, { "epoch": 0.06822967850624737, "grad_norm": 0.6600956320762634, "learning_rate": 2.2720897615708276e-06, "loss": 0.6112, "step": 162 }, { "epoch": 0.0686508493612242, "grad_norm": 0.6079633235931396, "learning_rate": 2.286115007012623e-06, "loss": 0.5907, "step": 163 }, { "epoch": 0.06907202021620104, "grad_norm": 0.5583291053771973, "learning_rate": 2.300140252454418e-06, "loss": 0.5497, "step": 164 }, { "epoch": 0.06949319107117788, "grad_norm": 0.546212911605835, "learning_rate": 2.3141654978962133e-06, "loss": 0.5441, "step": 165 }, { "epoch": 0.06991436192615472, "grad_norm": 0.663681149482727, "learning_rate": 2.3281907433380085e-06, "loss": 0.6203, "step": 166 }, { "epoch": 0.07033553278113154, "grad_norm": 0.5780558586120605, "learning_rate": 2.3422159887798037e-06, "loss": 0.523, "step": 167 }, { "epoch": 0.07075670363610838, "grad_norm": 0.5863725543022156, "learning_rate": 2.356241234221599e-06, "loss": 0.5824, "step": 168 }, { "epoch": 0.07117787449108522, "grad_norm": 0.5837494134902954, "learning_rate": 2.370266479663394e-06, "loss": 0.5483, "step": 169 }, { "epoch": 0.07159904534606205, "grad_norm": 0.6258354783058167, "learning_rate": 2.3842917251051894e-06, "loss": 0.5594, "step": 170 }, { "epoch": 0.07202021620103889, "grad_norm": 0.6249247193336487, "learning_rate": 2.3983169705469846e-06, "loss": 0.5629, "step": 171 }, { "epoch": 0.07244138705601573, "grad_norm": 0.6225827932357788, "learning_rate": 2.4123422159887798e-06, "loss": 0.5454, "step": 172 }, { "epoch": 0.07286255791099255, "grad_norm": 0.6631636023521423, "learning_rate": 2.426367461430575e-06, "loss": 0.5934, "step": 173 }, { "epoch": 0.07328372876596939, "grad_norm": 0.6944399476051331, "learning_rate": 2.4403927068723702e-06, "loss": 0.5789, "step": 174 }, { "epoch": 0.07370489962094623, "grad_norm": 0.5462700724601746, "learning_rate": 2.4544179523141654e-06, "loss": 0.5621, "step": 175 }, { "epoch": 0.07412607047592307, "grad_norm": 0.5702570080757141, "learning_rate": 2.468443197755961e-06, "loss": 0.5711, "step": 176 }, { "epoch": 0.0745472413308999, "grad_norm": 0.5842878818511963, "learning_rate": 2.4824684431977563e-06, "loss": 0.587, "step": 177 }, { "epoch": 0.07496841218587674, "grad_norm": 0.6160016059875488, "learning_rate": 2.4964936886395515e-06, "loss": 0.5741, "step": 178 }, { "epoch": 0.07538958304085357, "grad_norm": 0.5883713364601135, "learning_rate": 2.5105189340813467e-06, "loss": 0.5285, "step": 179 }, { "epoch": 0.0758107538958304, "grad_norm": 0.5826126337051392, "learning_rate": 2.524544179523142e-06, "loss": 0.6022, "step": 180 }, { "epoch": 0.07623192475080724, "grad_norm": 0.5526702404022217, "learning_rate": 2.538569424964937e-06, "loss": 0.5303, "step": 181 }, { "epoch": 0.07665309560578408, "grad_norm": 0.5422945022583008, "learning_rate": 2.5525946704067324e-06, "loss": 0.5535, "step": 182 }, { "epoch": 0.07707426646076092, "grad_norm": 0.5104424357414246, "learning_rate": 2.5666199158485276e-06, "loss": 0.5358, "step": 183 }, { "epoch": 0.07749543731573776, "grad_norm": 0.5650685429573059, "learning_rate": 2.580645161290323e-06, "loss": 0.548, "step": 184 }, { "epoch": 0.07791660817071458, "grad_norm": 0.6644390225410461, "learning_rate": 2.594670406732118e-06, "loss": 0.5762, "step": 185 }, { "epoch": 0.07833777902569142, "grad_norm": 0.5428359508514404, "learning_rate": 2.6086956521739132e-06, "loss": 0.5351, "step": 186 }, { "epoch": 0.07875894988066826, "grad_norm": 0.6112319827079773, "learning_rate": 2.622720897615709e-06, "loss": 0.549, "step": 187 }, { "epoch": 0.0791801207356451, "grad_norm": 0.6554045081138611, "learning_rate": 2.6367461430575037e-06, "loss": 0.614, "step": 188 }, { "epoch": 0.07960129159062193, "grad_norm": 0.6421487331390381, "learning_rate": 2.6507713884992993e-06, "loss": 0.5606, "step": 189 }, { "epoch": 0.08002246244559877, "grad_norm": 0.5740941762924194, "learning_rate": 2.664796633941094e-06, "loss": 0.556, "step": 190 }, { "epoch": 0.0804436333005756, "grad_norm": 0.5870221853256226, "learning_rate": 2.6788218793828897e-06, "loss": 0.5462, "step": 191 }, { "epoch": 0.08086480415555243, "grad_norm": 0.5760233998298645, "learning_rate": 2.6928471248246845e-06, "loss": 0.5789, "step": 192 }, { "epoch": 0.08128597501052927, "grad_norm": 0.5866135358810425, "learning_rate": 2.70687237026648e-06, "loss": 0.5477, "step": 193 }, { "epoch": 0.0817071458655061, "grad_norm": 0.5905698537826538, "learning_rate": 2.720897615708275e-06, "loss": 0.5907, "step": 194 }, { "epoch": 0.08212831672048294, "grad_norm": 0.5959790945053101, "learning_rate": 2.7349228611500706e-06, "loss": 0.5629, "step": 195 }, { "epoch": 0.08254948757545978, "grad_norm": 0.5589589476585388, "learning_rate": 2.7489481065918654e-06, "loss": 0.5662, "step": 196 }, { "epoch": 0.08297065843043662, "grad_norm": 0.5695422291755676, "learning_rate": 2.762973352033661e-06, "loss": 0.5609, "step": 197 }, { "epoch": 0.08339182928541344, "grad_norm": 0.5732924342155457, "learning_rate": 2.776998597475456e-06, "loss": 0.5537, "step": 198 }, { "epoch": 0.08381300014039028, "grad_norm": 0.5380580425262451, "learning_rate": 2.7910238429172515e-06, "loss": 0.521, "step": 199 }, { "epoch": 0.08423417099536712, "grad_norm": 0.6387904286384583, "learning_rate": 2.8050490883590463e-06, "loss": 0.6048, "step": 200 }, { "epoch": 0.08465534185034396, "grad_norm": 0.5489081144332886, "learning_rate": 2.819074333800842e-06, "loss": 0.5131, "step": 201 }, { "epoch": 0.0850765127053208, "grad_norm": 0.538929283618927, "learning_rate": 2.8330995792426367e-06, "loss": 0.5451, "step": 202 }, { "epoch": 0.08549768356029763, "grad_norm": 0.628017008304596, "learning_rate": 2.8471248246844323e-06, "loss": 0.5678, "step": 203 }, { "epoch": 0.08591885441527446, "grad_norm": 0.616239607334137, "learning_rate": 2.8611500701262276e-06, "loss": 0.5172, "step": 204 }, { "epoch": 0.0863400252702513, "grad_norm": 0.6761292219161987, "learning_rate": 2.8751753155680228e-06, "loss": 0.549, "step": 205 }, { "epoch": 0.08676119612522813, "grad_norm": 0.5523682236671448, "learning_rate": 2.889200561009818e-06, "loss": 0.5034, "step": 206 }, { "epoch": 0.08718236698020497, "grad_norm": 0.5737572908401489, "learning_rate": 2.903225806451613e-06, "loss": 0.614, "step": 207 }, { "epoch": 0.08760353783518181, "grad_norm": 0.5924317240715027, "learning_rate": 2.9172510518934084e-06, "loss": 0.5216, "step": 208 }, { "epoch": 0.08802470869015865, "grad_norm": 0.570369303226471, "learning_rate": 2.9312762973352036e-06, "loss": 0.5481, "step": 209 }, { "epoch": 0.08844587954513548, "grad_norm": 0.520557165145874, "learning_rate": 2.945301542776999e-06, "loss": 0.5451, "step": 210 }, { "epoch": 0.08886705040011231, "grad_norm": 0.5321965217590332, "learning_rate": 2.959326788218794e-06, "loss": 0.5216, "step": 211 }, { "epoch": 0.08928822125508915, "grad_norm": 0.5553423762321472, "learning_rate": 2.9733520336605893e-06, "loss": 0.579, "step": 212 }, { "epoch": 0.08970939211006598, "grad_norm": 0.5517761707305908, "learning_rate": 2.987377279102385e-06, "loss": 0.5358, "step": 213 }, { "epoch": 0.09013056296504282, "grad_norm": 0.563995897769928, "learning_rate": 3.0014025245441797e-06, "loss": 0.5398, "step": 214 }, { "epoch": 0.09055173382001966, "grad_norm": 0.6098183989524841, "learning_rate": 3.0154277699859754e-06, "loss": 0.62, "step": 215 }, { "epoch": 0.0909729046749965, "grad_norm": 0.6418140530586243, "learning_rate": 3.02945301542777e-06, "loss": 0.527, "step": 216 }, { "epoch": 0.09139407552997332, "grad_norm": 0.6254906058311462, "learning_rate": 3.043478260869566e-06, "loss": 0.6154, "step": 217 }, { "epoch": 0.09181524638495016, "grad_norm": 0.5820286273956299, "learning_rate": 3.0575035063113606e-06, "loss": 0.5094, "step": 218 }, { "epoch": 0.092236417239927, "grad_norm": 0.6237866878509521, "learning_rate": 3.0715287517531562e-06, "loss": 0.5375, "step": 219 }, { "epoch": 0.09265758809490383, "grad_norm": 0.6474324464797974, "learning_rate": 3.085553997194951e-06, "loss": 0.5549, "step": 220 }, { "epoch": 0.09307875894988067, "grad_norm": 0.5439502596855164, "learning_rate": 3.0995792426367467e-06, "loss": 0.5066, "step": 221 }, { "epoch": 0.09349992980485751, "grad_norm": 0.5525719523429871, "learning_rate": 3.1136044880785415e-06, "loss": 0.5302, "step": 222 }, { "epoch": 0.09392110065983433, "grad_norm": 0.539968729019165, "learning_rate": 3.127629733520337e-06, "loss": 0.564, "step": 223 }, { "epoch": 0.09434227151481117, "grad_norm": 0.5756800174713135, "learning_rate": 3.141654978962132e-06, "loss": 0.5686, "step": 224 }, { "epoch": 0.09476344236978801, "grad_norm": 0.5688039064407349, "learning_rate": 3.1556802244039275e-06, "loss": 0.5616, "step": 225 }, { "epoch": 0.09518461322476485, "grad_norm": 0.5455812811851501, "learning_rate": 3.1697054698457223e-06, "loss": 0.5302, "step": 226 }, { "epoch": 0.09560578407974168, "grad_norm": 0.6285903453826904, "learning_rate": 3.183730715287518e-06, "loss": 0.5821, "step": 227 }, { "epoch": 0.09602695493471852, "grad_norm": 0.704421877861023, "learning_rate": 3.1977559607293128e-06, "loss": 0.6015, "step": 228 }, { "epoch": 0.09644812578969535, "grad_norm": 0.5863338112831116, "learning_rate": 3.2117812061711084e-06, "loss": 0.524, "step": 229 }, { "epoch": 0.09686929664467218, "grad_norm": 0.62582927942276, "learning_rate": 3.225806451612903e-06, "loss": 0.5415, "step": 230 }, { "epoch": 0.09729046749964902, "grad_norm": 0.6050381064414978, "learning_rate": 3.239831697054699e-06, "loss": 0.5392, "step": 231 }, { "epoch": 0.09771163835462586, "grad_norm": 0.5865756273269653, "learning_rate": 3.253856942496494e-06, "loss": 0.5408, "step": 232 }, { "epoch": 0.0981328092096027, "grad_norm": 0.5667901039123535, "learning_rate": 3.2678821879382893e-06, "loss": 0.5387, "step": 233 }, { "epoch": 0.09855398006457954, "grad_norm": 0.5694307684898376, "learning_rate": 3.2819074333800845e-06, "loss": 0.5278, "step": 234 }, { "epoch": 0.09897515091955637, "grad_norm": 0.620830237865448, "learning_rate": 3.2959326788218797e-06, "loss": 0.5389, "step": 235 }, { "epoch": 0.0993963217745332, "grad_norm": 0.6208356022834778, "learning_rate": 3.309957924263675e-06, "loss": 0.6016, "step": 236 }, { "epoch": 0.09981749262951004, "grad_norm": 0.5698862671852112, "learning_rate": 3.32398316970547e-06, "loss": 0.5422, "step": 237 }, { "epoch": 0.10023866348448687, "grad_norm": 0.5780800580978394, "learning_rate": 3.3380084151472653e-06, "loss": 0.5525, "step": 238 }, { "epoch": 0.10065983433946371, "grad_norm": 0.5857884287834167, "learning_rate": 3.3520336605890606e-06, "loss": 0.5672, "step": 239 }, { "epoch": 0.10108100519444055, "grad_norm": 0.5506059527397156, "learning_rate": 3.3660589060308558e-06, "loss": 0.5295, "step": 240 }, { "epoch": 0.10150217604941739, "grad_norm": 0.5589848756790161, "learning_rate": 3.3800841514726514e-06, "loss": 0.5358, "step": 241 }, { "epoch": 0.10192334690439421, "grad_norm": 0.5889762043952942, "learning_rate": 3.394109396914446e-06, "loss": 0.5541, "step": 242 }, { "epoch": 0.10234451775937105, "grad_norm": 0.5335559844970703, "learning_rate": 3.408134642356242e-06, "loss": 0.5177, "step": 243 }, { "epoch": 0.10276568861434789, "grad_norm": 0.5615264177322388, "learning_rate": 3.4221598877980366e-06, "loss": 0.5364, "step": 244 }, { "epoch": 0.10318685946932472, "grad_norm": 0.5518187880516052, "learning_rate": 3.4361851332398323e-06, "loss": 0.5519, "step": 245 }, { "epoch": 0.10360803032430156, "grad_norm": 0.5958520770072937, "learning_rate": 3.450210378681627e-06, "loss": 0.5231, "step": 246 }, { "epoch": 0.1040292011792784, "grad_norm": 0.5683255195617676, "learning_rate": 3.4642356241234227e-06, "loss": 0.5714, "step": 247 }, { "epoch": 0.10445037203425522, "grad_norm": 0.5625893473625183, "learning_rate": 3.4782608695652175e-06, "loss": 0.5474, "step": 248 }, { "epoch": 0.10487154288923206, "grad_norm": 0.6234820485115051, "learning_rate": 3.492286115007013e-06, "loss": 0.5797, "step": 249 }, { "epoch": 0.1052927137442089, "grad_norm": 0.6023511290550232, "learning_rate": 3.506311360448808e-06, "loss": 0.5592, "step": 250 }, { "epoch": 0.10571388459918574, "grad_norm": 0.5659871101379395, "learning_rate": 3.5203366058906036e-06, "loss": 0.5361, "step": 251 }, { "epoch": 0.10613505545416257, "grad_norm": 0.6142107844352722, "learning_rate": 3.5343618513323984e-06, "loss": 0.5653, "step": 252 }, { "epoch": 0.10655622630913941, "grad_norm": 0.6485642194747925, "learning_rate": 3.548387096774194e-06, "loss": 0.5622, "step": 253 }, { "epoch": 0.10697739716411624, "grad_norm": 0.5409626364707947, "learning_rate": 3.562412342215989e-06, "loss": 0.5568, "step": 254 }, { "epoch": 0.10739856801909307, "grad_norm": 0.5589439868927002, "learning_rate": 3.5764375876577844e-06, "loss": 0.5137, "step": 255 }, { "epoch": 0.10781973887406991, "grad_norm": 0.5383075475692749, "learning_rate": 3.5904628330995792e-06, "loss": 0.5273, "step": 256 }, { "epoch": 0.10824090972904675, "grad_norm": 0.5935497879981995, "learning_rate": 3.604488078541375e-06, "loss": 0.5466, "step": 257 }, { "epoch": 0.10866208058402359, "grad_norm": 0.6107301712036133, "learning_rate": 3.61851332398317e-06, "loss": 0.5143, "step": 258 }, { "epoch": 0.10908325143900043, "grad_norm": 0.5637887716293335, "learning_rate": 3.6325385694249653e-06, "loss": 0.5742, "step": 259 }, { "epoch": 0.10950442229397726, "grad_norm": 0.6438877582550049, "learning_rate": 3.6465638148667605e-06, "loss": 0.5511, "step": 260 }, { "epoch": 0.10992559314895409, "grad_norm": 0.6031904220581055, "learning_rate": 3.6605890603085557e-06, "loss": 0.5293, "step": 261 }, { "epoch": 0.11034676400393092, "grad_norm": 0.6586031317710876, "learning_rate": 3.674614305750351e-06, "loss": 0.568, "step": 262 }, { "epoch": 0.11076793485890776, "grad_norm": 0.5576374530792236, "learning_rate": 3.688639551192146e-06, "loss": 0.5368, "step": 263 }, { "epoch": 0.1111891057138846, "grad_norm": 0.5326905250549316, "learning_rate": 3.7026647966339414e-06, "loss": 0.5121, "step": 264 }, { "epoch": 0.11161027656886144, "grad_norm": 0.5554443597793579, "learning_rate": 3.7166900420757366e-06, "loss": 0.5856, "step": 265 }, { "epoch": 0.11203144742383828, "grad_norm": 0.5672991871833801, "learning_rate": 3.730715287517532e-06, "loss": 0.555, "step": 266 }, { "epoch": 0.1124526182788151, "grad_norm": 0.5691398978233337, "learning_rate": 3.7447405329593275e-06, "loss": 0.5407, "step": 267 }, { "epoch": 0.11287378913379194, "grad_norm": 0.5297806859016418, "learning_rate": 3.7587657784011223e-06, "loss": 0.5318, "step": 268 }, { "epoch": 0.11329495998876878, "grad_norm": 0.5532379746437073, "learning_rate": 3.772791023842918e-06, "loss": 0.544, "step": 269 }, { "epoch": 0.11371613084374561, "grad_norm": 0.5643096566200256, "learning_rate": 3.7868162692847127e-06, "loss": 0.5334, "step": 270 }, { "epoch": 0.11413730169872245, "grad_norm": 0.6074506044387817, "learning_rate": 3.8008415147265083e-06, "loss": 0.5635, "step": 271 }, { "epoch": 0.11455847255369929, "grad_norm": 0.6324126124382019, "learning_rate": 3.814866760168303e-06, "loss": 0.5112, "step": 272 }, { "epoch": 0.11497964340867611, "grad_norm": 0.517156720161438, "learning_rate": 3.828892005610099e-06, "loss": 0.5135, "step": 273 }, { "epoch": 0.11540081426365295, "grad_norm": 0.5071574449539185, "learning_rate": 3.8429172510518936e-06, "loss": 0.508, "step": 274 }, { "epoch": 0.11582198511862979, "grad_norm": 0.6217570304870605, "learning_rate": 3.856942496493689e-06, "loss": 0.5389, "step": 275 }, { "epoch": 0.11624315597360663, "grad_norm": 0.5407923460006714, "learning_rate": 3.870967741935484e-06, "loss": 0.5506, "step": 276 }, { "epoch": 0.11666432682858346, "grad_norm": 0.5973406434059143, "learning_rate": 3.88499298737728e-06, "loss": 0.5878, "step": 277 }, { "epoch": 0.1170854976835603, "grad_norm": 0.5637548565864563, "learning_rate": 3.8990182328190744e-06, "loss": 0.5103, "step": 278 }, { "epoch": 0.11750666853853714, "grad_norm": 0.5851830244064331, "learning_rate": 3.91304347826087e-06, "loss": 0.548, "step": 279 }, { "epoch": 0.11792783939351396, "grad_norm": 0.58241206407547, "learning_rate": 3.927068723702665e-06, "loss": 0.5686, "step": 280 }, { "epoch": 0.1183490102484908, "grad_norm": 0.5306934118270874, "learning_rate": 3.9410939691444605e-06, "loss": 0.5187, "step": 281 }, { "epoch": 0.11877018110346764, "grad_norm": 0.5542960166931152, "learning_rate": 3.955119214586255e-06, "loss": 0.5228, "step": 282 }, { "epoch": 0.11919135195844448, "grad_norm": 0.5167752504348755, "learning_rate": 3.969144460028051e-06, "loss": 0.5467, "step": 283 }, { "epoch": 0.11961252281342132, "grad_norm": 0.6105754375457764, "learning_rate": 3.983169705469846e-06, "loss": 0.5404, "step": 284 }, { "epoch": 0.12003369366839815, "grad_norm": 0.5512216687202454, "learning_rate": 3.997194950911641e-06, "loss": 0.6056, "step": 285 }, { "epoch": 0.12045486452337498, "grad_norm": 0.5348156094551086, "learning_rate": 4.011220196353436e-06, "loss": 0.4953, "step": 286 }, { "epoch": 0.12087603537835181, "grad_norm": 0.6238960027694702, "learning_rate": 4.025245441795232e-06, "loss": 0.5692, "step": 287 }, { "epoch": 0.12129720623332865, "grad_norm": 0.5966511964797974, "learning_rate": 4.039270687237027e-06, "loss": 0.515, "step": 288 }, { "epoch": 0.12171837708830549, "grad_norm": 0.5124213695526123, "learning_rate": 4.053295932678822e-06, "loss": 0.5324, "step": 289 }, { "epoch": 0.12213954794328233, "grad_norm": 0.570331335067749, "learning_rate": 4.067321178120617e-06, "loss": 0.5357, "step": 290 }, { "epoch": 0.12256071879825917, "grad_norm": 0.5817534327507019, "learning_rate": 4.081346423562413e-06, "loss": 0.5617, "step": 291 }, { "epoch": 0.12298188965323599, "grad_norm": 0.5455261468887329, "learning_rate": 4.0953716690042075e-06, "loss": 0.5661, "step": 292 }, { "epoch": 0.12340306050821283, "grad_norm": 0.6355792284011841, "learning_rate": 4.109396914446003e-06, "loss": 0.5899, "step": 293 }, { "epoch": 0.12382423136318967, "grad_norm": 0.5610126256942749, "learning_rate": 4.123422159887798e-06, "loss": 0.5216, "step": 294 }, { "epoch": 0.1242454022181665, "grad_norm": 0.6559709906578064, "learning_rate": 4.1374474053295935e-06, "loss": 0.5714, "step": 295 }, { "epoch": 0.12466657307314334, "grad_norm": 0.558094322681427, "learning_rate": 4.151472650771388e-06, "loss": 0.5048, "step": 296 }, { "epoch": 0.12508774392812017, "grad_norm": 0.590015709400177, "learning_rate": 4.165497896213184e-06, "loss": 0.5647, "step": 297 }, { "epoch": 0.125508914783097, "grad_norm": 0.5020599365234375, "learning_rate": 4.17952314165498e-06, "loss": 0.5214, "step": 298 }, { "epoch": 0.12593008563807384, "grad_norm": 0.5576549768447876, "learning_rate": 4.193548387096774e-06, "loss": 0.5238, "step": 299 }, { "epoch": 0.12635125649305068, "grad_norm": 0.5943588614463806, "learning_rate": 4.20757363253857e-06, "loss": 0.5345, "step": 300 }, { "epoch": 0.12677242734802752, "grad_norm": 0.6020628809928894, "learning_rate": 4.221598877980365e-06, "loss": 0.5547, "step": 301 }, { "epoch": 0.12719359820300435, "grad_norm": 0.47622644901275635, "learning_rate": 4.2356241234221605e-06, "loss": 0.4898, "step": 302 }, { "epoch": 0.1276147690579812, "grad_norm": 0.5206568837165833, "learning_rate": 4.249649368863955e-06, "loss": 0.5271, "step": 303 }, { "epoch": 0.12803593991295803, "grad_norm": 0.5596451759338379, "learning_rate": 4.263674614305751e-06, "loss": 0.5266, "step": 304 }, { "epoch": 0.12845711076793487, "grad_norm": 0.5662742853164673, "learning_rate": 4.277699859747546e-06, "loss": 0.5496, "step": 305 }, { "epoch": 0.1288782816229117, "grad_norm": 0.6275546550750732, "learning_rate": 4.291725105189341e-06, "loss": 0.5531, "step": 306 }, { "epoch": 0.12929945247788854, "grad_norm": 0.6425439715385437, "learning_rate": 4.305750350631136e-06, "loss": 0.5756, "step": 307 }, { "epoch": 0.12972062333286535, "grad_norm": 0.6223469972610474, "learning_rate": 4.319775596072932e-06, "loss": 0.5712, "step": 308 }, { "epoch": 0.1301417941878422, "grad_norm": 0.6282339096069336, "learning_rate": 4.333800841514727e-06, "loss": 0.5489, "step": 309 }, { "epoch": 0.13056296504281903, "grad_norm": 0.5506791472434998, "learning_rate": 4.347826086956522e-06, "loss": 0.523, "step": 310 }, { "epoch": 0.13098413589779587, "grad_norm": 0.545620858669281, "learning_rate": 4.361851332398318e-06, "loss": 0.562, "step": 311 }, { "epoch": 0.1314053067527727, "grad_norm": 0.5749505162239075, "learning_rate": 4.375876577840113e-06, "loss": 0.5616, "step": 312 }, { "epoch": 0.13182647760774954, "grad_norm": 0.6035079956054688, "learning_rate": 4.389901823281908e-06, "loss": 0.5682, "step": 313 }, { "epoch": 0.13224764846272638, "grad_norm": 0.6032407879829407, "learning_rate": 4.403927068723703e-06, "loss": 0.5422, "step": 314 }, { "epoch": 0.13266881931770322, "grad_norm": 0.5532835125923157, "learning_rate": 4.417952314165499e-06, "loss": 0.513, "step": 315 }, { "epoch": 0.13308999017268006, "grad_norm": 0.5708320140838623, "learning_rate": 4.4319775596072935e-06, "loss": 0.4978, "step": 316 }, { "epoch": 0.1335111610276569, "grad_norm": 0.49419543147087097, "learning_rate": 4.446002805049089e-06, "loss": 0.5286, "step": 317 }, { "epoch": 0.13393233188263373, "grad_norm": 0.6250544190406799, "learning_rate": 4.460028050490884e-06, "loss": 0.5907, "step": 318 }, { "epoch": 0.13435350273761057, "grad_norm": 0.5819352865219116, "learning_rate": 4.47405329593268e-06, "loss": 0.5774, "step": 319 }, { "epoch": 0.13477467359258738, "grad_norm": 0.5397717952728271, "learning_rate": 4.488078541374474e-06, "loss": 0.4965, "step": 320 }, { "epoch": 0.13519584444756422, "grad_norm": 0.5968171954154968, "learning_rate": 4.50210378681627e-06, "loss": 0.5731, "step": 321 }, { "epoch": 0.13561701530254106, "grad_norm": 0.5271232724189758, "learning_rate": 4.516129032258065e-06, "loss": 0.5159, "step": 322 }, { "epoch": 0.1360381861575179, "grad_norm": 0.5284804701805115, "learning_rate": 4.5301542776998604e-06, "loss": 0.549, "step": 323 }, { "epoch": 0.13645935701249473, "grad_norm": 0.6073772311210632, "learning_rate": 4.544179523141655e-06, "loss": 0.5217, "step": 324 }, { "epoch": 0.13688052786747157, "grad_norm": 0.542488157749176, "learning_rate": 4.558204768583451e-06, "loss": 0.5184, "step": 325 }, { "epoch": 0.1373016987224484, "grad_norm": 0.5452144145965576, "learning_rate": 4.572230014025246e-06, "loss": 0.5414, "step": 326 }, { "epoch": 0.13772286957742524, "grad_norm": 0.590615451335907, "learning_rate": 4.586255259467041e-06, "loss": 0.5515, "step": 327 }, { "epoch": 0.13814404043240208, "grad_norm": 0.49694594740867615, "learning_rate": 4.600280504908836e-06, "loss": 0.5163, "step": 328 }, { "epoch": 0.13856521128737892, "grad_norm": 0.5827462077140808, "learning_rate": 4.614305750350632e-06, "loss": 0.5828, "step": 329 }, { "epoch": 0.13898638214235576, "grad_norm": 0.5243639945983887, "learning_rate": 4.6283309957924265e-06, "loss": 0.5023, "step": 330 }, { "epoch": 0.1394075529973326, "grad_norm": 0.5570120215415955, "learning_rate": 4.642356241234222e-06, "loss": 0.5189, "step": 331 }, { "epoch": 0.13982872385230943, "grad_norm": 0.5266404747962952, "learning_rate": 4.656381486676017e-06, "loss": 0.567, "step": 332 }, { "epoch": 0.14024989470728624, "grad_norm": 0.543510913848877, "learning_rate": 4.670406732117813e-06, "loss": 0.528, "step": 333 }, { "epoch": 0.14067106556226308, "grad_norm": 0.6090495586395264, "learning_rate": 4.684431977559607e-06, "loss": 0.5698, "step": 334 }, { "epoch": 0.14109223641723992, "grad_norm": 0.5987915992736816, "learning_rate": 4.698457223001403e-06, "loss": 0.5167, "step": 335 }, { "epoch": 0.14151340727221676, "grad_norm": 0.5936709046363831, "learning_rate": 4.712482468443198e-06, "loss": 0.5129, "step": 336 }, { "epoch": 0.1419345781271936, "grad_norm": 0.633519172668457, "learning_rate": 4.7265077138849935e-06, "loss": 0.5336, "step": 337 }, { "epoch": 0.14235574898217043, "grad_norm": 0.5678297877311707, "learning_rate": 4.740532959326788e-06, "loss": 0.5434, "step": 338 }, { "epoch": 0.14277691983714727, "grad_norm": 0.6101937294006348, "learning_rate": 4.754558204768584e-06, "loss": 0.5615, "step": 339 }, { "epoch": 0.1431980906921241, "grad_norm": 0.586818516254425, "learning_rate": 4.768583450210379e-06, "loss": 0.5204, "step": 340 }, { "epoch": 0.14361926154710095, "grad_norm": 0.5664098262786865, "learning_rate": 4.782608695652174e-06, "loss": 0.4936, "step": 341 }, { "epoch": 0.14404043240207778, "grad_norm": 0.6275355219841003, "learning_rate": 4.796633941093969e-06, "loss": 0.4838, "step": 342 }, { "epoch": 0.14446160325705462, "grad_norm": 0.6292077302932739, "learning_rate": 4.810659186535765e-06, "loss": 0.5445, "step": 343 }, { "epoch": 0.14488277411203146, "grad_norm": 0.6907467246055603, "learning_rate": 4.8246844319775596e-06, "loss": 0.5476, "step": 344 }, { "epoch": 0.14530394496700827, "grad_norm": 0.6091658473014832, "learning_rate": 4.838709677419355e-06, "loss": 0.5047, "step": 345 }, { "epoch": 0.1457251158219851, "grad_norm": 0.5768958330154419, "learning_rate": 4.85273492286115e-06, "loss": 0.5102, "step": 346 }, { "epoch": 0.14614628667696195, "grad_norm": 0.6183321475982666, "learning_rate": 4.866760168302946e-06, "loss": 0.4977, "step": 347 }, { "epoch": 0.14656745753193878, "grad_norm": 0.5854082107543945, "learning_rate": 4.8807854137447404e-06, "loss": 0.5339, "step": 348 }, { "epoch": 0.14698862838691562, "grad_norm": 0.5457305908203125, "learning_rate": 4.894810659186536e-06, "loss": 0.5325, "step": 349 }, { "epoch": 0.14740979924189246, "grad_norm": 0.6354281902313232, "learning_rate": 4.908835904628331e-06, "loss": 0.5201, "step": 350 }, { "epoch": 0.1478309700968693, "grad_norm": 0.6330921053886414, "learning_rate": 4.9228611500701265e-06, "loss": 0.5192, "step": 351 }, { "epoch": 0.14825214095184613, "grad_norm": 0.6173386573791504, "learning_rate": 4.936886395511922e-06, "loss": 0.5699, "step": 352 }, { "epoch": 0.14867331180682297, "grad_norm": 0.6057515740394592, "learning_rate": 4.950911640953717e-06, "loss": 0.5022, "step": 353 }, { "epoch": 0.1490944826617998, "grad_norm": 0.5754292607307434, "learning_rate": 4.964936886395513e-06, "loss": 0.5314, "step": 354 }, { "epoch": 0.14951565351677665, "grad_norm": 0.6179988384246826, "learning_rate": 4.978962131837307e-06, "loss": 0.5544, "step": 355 }, { "epoch": 0.14993682437175349, "grad_norm": 0.5762883424758911, "learning_rate": 4.992987377279103e-06, "loss": 0.5457, "step": 356 }, { "epoch": 0.15035799522673032, "grad_norm": 0.5649310350418091, "learning_rate": 5.007012622720898e-06, "loss": 0.5466, "step": 357 }, { "epoch": 0.15077916608170713, "grad_norm": 0.5899968147277832, "learning_rate": 5.0210378681626934e-06, "loss": 0.5392, "step": 358 }, { "epoch": 0.15120033693668397, "grad_norm": 0.6056661009788513, "learning_rate": 5.035063113604488e-06, "loss": 0.5441, "step": 359 }, { "epoch": 0.1516215077916608, "grad_norm": 0.538449227809906, "learning_rate": 5.049088359046284e-06, "loss": 0.5227, "step": 360 }, { "epoch": 0.15204267864663765, "grad_norm": 0.5879290103912354, "learning_rate": 5.063113604488079e-06, "loss": 0.5499, "step": 361 }, { "epoch": 0.15246384950161448, "grad_norm": 0.5964445471763611, "learning_rate": 5.077138849929874e-06, "loss": 0.5612, "step": 362 }, { "epoch": 0.15288502035659132, "grad_norm": 0.5882713198661804, "learning_rate": 5.091164095371669e-06, "loss": 0.5474, "step": 363 }, { "epoch": 0.15330619121156816, "grad_norm": 0.5620375871658325, "learning_rate": 5.105189340813465e-06, "loss": 0.5497, "step": 364 }, { "epoch": 0.153727362066545, "grad_norm": 0.5413060188293457, "learning_rate": 5.11921458625526e-06, "loss": 0.5079, "step": 365 }, { "epoch": 0.15414853292152184, "grad_norm": 0.5801294445991516, "learning_rate": 5.133239831697055e-06, "loss": 0.5437, "step": 366 }, { "epoch": 0.15456970377649867, "grad_norm": 0.6140326857566833, "learning_rate": 5.14726507713885e-06, "loss": 0.5347, "step": 367 }, { "epoch": 0.1549908746314755, "grad_norm": 0.5649505853652954, "learning_rate": 5.161290322580646e-06, "loss": 0.5658, "step": 368 }, { "epoch": 0.15541204548645235, "grad_norm": 0.6298269033432007, "learning_rate": 5.175315568022441e-06, "loss": 0.5685, "step": 369 }, { "epoch": 0.15583321634142916, "grad_norm": 0.5043066143989563, "learning_rate": 5.189340813464236e-06, "loss": 0.5188, "step": 370 }, { "epoch": 0.156254387196406, "grad_norm": 0.6012574434280396, "learning_rate": 5.203366058906031e-06, "loss": 0.5281, "step": 371 }, { "epoch": 0.15667555805138283, "grad_norm": 0.6314870119094849, "learning_rate": 5.2173913043478265e-06, "loss": 0.5204, "step": 372 }, { "epoch": 0.15709672890635967, "grad_norm": 0.5233169794082642, "learning_rate": 5.231416549789622e-06, "loss": 0.5609, "step": 373 }, { "epoch": 0.1575178997613365, "grad_norm": 0.593696653842926, "learning_rate": 5.245441795231418e-06, "loss": 0.5356, "step": 374 }, { "epoch": 0.15793907061631335, "grad_norm": 0.5947218537330627, "learning_rate": 5.259467040673212e-06, "loss": 0.508, "step": 375 }, { "epoch": 0.1583602414712902, "grad_norm": 0.5978935360908508, "learning_rate": 5.273492286115007e-06, "loss": 0.5188, "step": 376 }, { "epoch": 0.15878141232626702, "grad_norm": 0.5975727438926697, "learning_rate": 5.287517531556803e-06, "loss": 0.4975, "step": 377 }, { "epoch": 0.15920258318124386, "grad_norm": 0.5700645446777344, "learning_rate": 5.301542776998599e-06, "loss": 0.5492, "step": 378 }, { "epoch": 0.1596237540362207, "grad_norm": 0.5721471905708313, "learning_rate": 5.3155680224403926e-06, "loss": 0.509, "step": 379 }, { "epoch": 0.16004492489119754, "grad_norm": 0.5330876708030701, "learning_rate": 5.329593267882188e-06, "loss": 0.4938, "step": 380 }, { "epoch": 0.16046609574617438, "grad_norm": 0.5589953660964966, "learning_rate": 5.343618513323984e-06, "loss": 0.5117, "step": 381 }, { "epoch": 0.1608872666011512, "grad_norm": 0.6808726191520691, "learning_rate": 5.3576437587657795e-06, "loss": 0.5175, "step": 382 }, { "epoch": 0.16130843745612802, "grad_norm": 0.5649275183677673, "learning_rate": 5.3716690042075734e-06, "loss": 0.5547, "step": 383 }, { "epoch": 0.16172960831110486, "grad_norm": 0.5099338293075562, "learning_rate": 5.385694249649369e-06, "loss": 0.5118, "step": 384 }, { "epoch": 0.1621507791660817, "grad_norm": 0.5739375352859497, "learning_rate": 5.399719495091165e-06, "loss": 0.5831, "step": 385 }, { "epoch": 0.16257195002105854, "grad_norm": 0.583167314529419, "learning_rate": 5.41374474053296e-06, "loss": 0.5391, "step": 386 }, { "epoch": 0.16299312087603537, "grad_norm": 0.6409912705421448, "learning_rate": 5.427769985974754e-06, "loss": 0.5301, "step": 387 }, { "epoch": 0.1634142917310122, "grad_norm": 0.5747987031936646, "learning_rate": 5.44179523141655e-06, "loss": 0.5212, "step": 388 }, { "epoch": 0.16383546258598905, "grad_norm": 0.5741598010063171, "learning_rate": 5.455820476858346e-06, "loss": 0.5404, "step": 389 }, { "epoch": 0.1642566334409659, "grad_norm": 0.6030471920967102, "learning_rate": 5.469845722300141e-06, "loss": 0.5079, "step": 390 }, { "epoch": 0.16467780429594273, "grad_norm": 0.5745446085929871, "learning_rate": 5.483870967741935e-06, "loss": 0.5492, "step": 391 }, { "epoch": 0.16509897515091956, "grad_norm": 0.5105449557304382, "learning_rate": 5.497896213183731e-06, "loss": 0.4769, "step": 392 }, { "epoch": 0.1655201460058964, "grad_norm": 0.5367932915687561, "learning_rate": 5.5119214586255264e-06, "loss": 0.5394, "step": 393 }, { "epoch": 0.16594131686087324, "grad_norm": 0.5448702573776245, "learning_rate": 5.525946704067322e-06, "loss": 0.4812, "step": 394 }, { "epoch": 0.16636248771585008, "grad_norm": 0.5656373500823975, "learning_rate": 5.539971949509116e-06, "loss": 0.5179, "step": 395 }, { "epoch": 0.1667836585708269, "grad_norm": 0.5118184089660645, "learning_rate": 5.553997194950912e-06, "loss": 0.5013, "step": 396 }, { "epoch": 0.16720482942580372, "grad_norm": 0.5639985203742981, "learning_rate": 5.568022440392707e-06, "loss": 0.4797, "step": 397 }, { "epoch": 0.16762600028078056, "grad_norm": 0.5634037256240845, "learning_rate": 5.582047685834503e-06, "loss": 0.5332, "step": 398 }, { "epoch": 0.1680471711357574, "grad_norm": 0.5385947823524475, "learning_rate": 5.596072931276298e-06, "loss": 0.5108, "step": 399 }, { "epoch": 0.16846834199073424, "grad_norm": 0.6148417592048645, "learning_rate": 5.6100981767180925e-06, "loss": 0.5198, "step": 400 }, { "epoch": 0.16888951284571108, "grad_norm": 0.5938084125518799, "learning_rate": 5.624123422159888e-06, "loss": 0.5091, "step": 401 }, { "epoch": 0.16931068370068791, "grad_norm": 0.6238675713539124, "learning_rate": 5.638148667601684e-06, "loss": 0.5209, "step": 402 }, { "epoch": 0.16973185455566475, "grad_norm": 0.5981119275093079, "learning_rate": 5.652173913043479e-06, "loss": 0.5223, "step": 403 }, { "epoch": 0.1701530254106416, "grad_norm": 0.6542584896087646, "learning_rate": 5.666199158485273e-06, "loss": 0.5613, "step": 404 }, { "epoch": 0.17057419626561843, "grad_norm": 0.650939404964447, "learning_rate": 5.680224403927069e-06, "loss": 0.5387, "step": 405 }, { "epoch": 0.17099536712059527, "grad_norm": 0.5631885528564453, "learning_rate": 5.694249649368865e-06, "loss": 0.5258, "step": 406 }, { "epoch": 0.1714165379755721, "grad_norm": 0.5457655191421509, "learning_rate": 5.7082748948106595e-06, "loss": 0.5435, "step": 407 }, { "epoch": 0.1718377088305489, "grad_norm": 0.6297253370285034, "learning_rate": 5.722300140252455e-06, "loss": 0.5579, "step": 408 }, { "epoch": 0.17225887968552575, "grad_norm": 0.5665075182914734, "learning_rate": 5.73632538569425e-06, "loss": 0.5469, "step": 409 }, { "epoch": 0.1726800505405026, "grad_norm": 0.5914989709854126, "learning_rate": 5.7503506311360456e-06, "loss": 0.5355, "step": 410 }, { "epoch": 0.17310122139547943, "grad_norm": 0.5828976035118103, "learning_rate": 5.76437587657784e-06, "loss": 0.5647, "step": 411 }, { "epoch": 0.17352239225045626, "grad_norm": 0.604396402835846, "learning_rate": 5.778401122019636e-06, "loss": 0.5321, "step": 412 }, { "epoch": 0.1739435631054331, "grad_norm": 0.5480253100395203, "learning_rate": 5.792426367461431e-06, "loss": 0.5113, "step": 413 }, { "epoch": 0.17436473396040994, "grad_norm": 0.5508501529693604, "learning_rate": 5.806451612903226e-06, "loss": 0.4876, "step": 414 }, { "epoch": 0.17478590481538678, "grad_norm": 0.5806002020835876, "learning_rate": 5.820476858345021e-06, "loss": 0.5452, "step": 415 }, { "epoch": 0.17520707567036362, "grad_norm": 0.6215909123420715, "learning_rate": 5.834502103786817e-06, "loss": 0.5208, "step": 416 }, { "epoch": 0.17562824652534045, "grad_norm": 0.5995006561279297, "learning_rate": 5.8485273492286125e-06, "loss": 0.5016, "step": 417 }, { "epoch": 0.1760494173803173, "grad_norm": 0.59804368019104, "learning_rate": 5.862552594670407e-06, "loss": 0.5375, "step": 418 }, { "epoch": 0.17647058823529413, "grad_norm": 0.5603466629981995, "learning_rate": 5.876577840112202e-06, "loss": 0.5326, "step": 419 }, { "epoch": 0.17689175909027097, "grad_norm": 0.6054261326789856, "learning_rate": 5.890603085553998e-06, "loss": 0.4955, "step": 420 }, { "epoch": 0.17731292994524778, "grad_norm": 0.662607729434967, "learning_rate": 5.904628330995793e-06, "loss": 0.5343, "step": 421 }, { "epoch": 0.17773410080022461, "grad_norm": 0.6148008704185486, "learning_rate": 5.918653576437588e-06, "loss": 0.5388, "step": 422 }, { "epoch": 0.17815527165520145, "grad_norm": 0.5996546149253845, "learning_rate": 5.932678821879383e-06, "loss": 0.5329, "step": 423 }, { "epoch": 0.1785764425101783, "grad_norm": 0.6458231210708618, "learning_rate": 5.946704067321179e-06, "loss": 0.5265, "step": 424 }, { "epoch": 0.17899761336515513, "grad_norm": 0.6888505220413208, "learning_rate": 5.960729312762974e-06, "loss": 0.5388, "step": 425 }, { "epoch": 0.17941878422013197, "grad_norm": 0.5788480043411255, "learning_rate": 5.97475455820477e-06, "loss": 0.4928, "step": 426 }, { "epoch": 0.1798399550751088, "grad_norm": 0.6998277306556702, "learning_rate": 5.988779803646564e-06, "loss": 0.548, "step": 427 }, { "epoch": 0.18026112593008564, "grad_norm": 0.6664361953735352, "learning_rate": 6.0028050490883594e-06, "loss": 0.4908, "step": 428 }, { "epoch": 0.18068229678506248, "grad_norm": 0.6013952493667603, "learning_rate": 6.016830294530155e-06, "loss": 0.4968, "step": 429 }, { "epoch": 0.18110346764003932, "grad_norm": 0.636076807975769, "learning_rate": 6.030855539971951e-06, "loss": 0.5284, "step": 430 }, { "epoch": 0.18152463849501616, "grad_norm": 0.5895690321922302, "learning_rate": 6.044880785413745e-06, "loss": 0.5112, "step": 431 }, { "epoch": 0.181945809349993, "grad_norm": 0.6102212071418762, "learning_rate": 6.05890603085554e-06, "loss": 0.5343, "step": 432 }, { "epoch": 0.1823669802049698, "grad_norm": 0.5380189418792725, "learning_rate": 6.072931276297336e-06, "loss": 0.5111, "step": 433 }, { "epoch": 0.18278815105994664, "grad_norm": 0.5786718130111694, "learning_rate": 6.086956521739132e-06, "loss": 0.5094, "step": 434 }, { "epoch": 0.18320932191492348, "grad_norm": 0.6232675313949585, "learning_rate": 6.1009817671809255e-06, "loss": 0.5434, "step": 435 }, { "epoch": 0.18363049276990032, "grad_norm": 0.6569251418113708, "learning_rate": 6.115007012622721e-06, "loss": 0.504, "step": 436 }, { "epoch": 0.18405166362487715, "grad_norm": 0.6095144152641296, "learning_rate": 6.129032258064517e-06, "loss": 0.5924, "step": 437 }, { "epoch": 0.184472834479854, "grad_norm": 0.5751181244850159, "learning_rate": 6.1430575035063125e-06, "loss": 0.5188, "step": 438 }, { "epoch": 0.18489400533483083, "grad_norm": 0.599271833896637, "learning_rate": 6.157082748948106e-06, "loss": 0.5432, "step": 439 }, { "epoch": 0.18531517618980767, "grad_norm": 0.628640353679657, "learning_rate": 6.171107994389902e-06, "loss": 0.5225, "step": 440 }, { "epoch": 0.1857363470447845, "grad_norm": 0.6243789792060852, "learning_rate": 6.185133239831698e-06, "loss": 0.596, "step": 441 }, { "epoch": 0.18615751789976134, "grad_norm": 0.5584890842437744, "learning_rate": 6.199158485273493e-06, "loss": 0.4924, "step": 442 }, { "epoch": 0.18657868875473818, "grad_norm": 0.6124101281166077, "learning_rate": 6.213183730715287e-06, "loss": 0.5215, "step": 443 }, { "epoch": 0.18699985960971502, "grad_norm": 0.5538225769996643, "learning_rate": 6.227208976157083e-06, "loss": 0.5208, "step": 444 }, { "epoch": 0.18742103046469186, "grad_norm": 0.5956318974494934, "learning_rate": 6.2412342215988786e-06, "loss": 0.57, "step": 445 }, { "epoch": 0.18784220131966867, "grad_norm": 0.509066104888916, "learning_rate": 6.255259467040674e-06, "loss": 0.5124, "step": 446 }, { "epoch": 0.1882633721746455, "grad_norm": 0.5567045211791992, "learning_rate": 6.269284712482468e-06, "loss": 0.5479, "step": 447 }, { "epoch": 0.18868454302962234, "grad_norm": 0.6536104679107666, "learning_rate": 6.283309957924264e-06, "loss": 0.494, "step": 448 }, { "epoch": 0.18910571388459918, "grad_norm": 0.5394294857978821, "learning_rate": 6.297335203366059e-06, "loss": 0.5585, "step": 449 }, { "epoch": 0.18952688473957602, "grad_norm": 0.5451724529266357, "learning_rate": 6.311360448807855e-06, "loss": 0.5443, "step": 450 }, { "epoch": 0.18994805559455286, "grad_norm": 0.5586307644844055, "learning_rate": 6.32538569424965e-06, "loss": 0.5033, "step": 451 }, { "epoch": 0.1903692264495297, "grad_norm": 0.57188481092453, "learning_rate": 6.339410939691445e-06, "loss": 0.5147, "step": 452 }, { "epoch": 0.19079039730450653, "grad_norm": 0.5099505186080933, "learning_rate": 6.35343618513324e-06, "loss": 0.5373, "step": 453 }, { "epoch": 0.19121156815948337, "grad_norm": 0.5432437062263489, "learning_rate": 6.367461430575036e-06, "loss": 0.505, "step": 454 }, { "epoch": 0.1916327390144602, "grad_norm": 0.5339857935905457, "learning_rate": 6.381486676016831e-06, "loss": 0.5212, "step": 455 }, { "epoch": 0.19205390986943705, "grad_norm": 0.5308710932731628, "learning_rate": 6.3955119214586255e-06, "loss": 0.5004, "step": 456 }, { "epoch": 0.19247508072441388, "grad_norm": 0.46928709745407104, "learning_rate": 6.409537166900421e-06, "loss": 0.5092, "step": 457 }, { "epoch": 0.1928962515793907, "grad_norm": 0.5794248580932617, "learning_rate": 6.423562412342217e-06, "loss": 0.517, "step": 458 }, { "epoch": 0.19331742243436753, "grad_norm": 0.5803227424621582, "learning_rate": 6.437587657784012e-06, "loss": 0.5198, "step": 459 }, { "epoch": 0.19373859328934437, "grad_norm": 0.5212175846099854, "learning_rate": 6.451612903225806e-06, "loss": 0.5246, "step": 460 }, { "epoch": 0.1941597641443212, "grad_norm": 0.5324025750160217, "learning_rate": 6.465638148667602e-06, "loss": 0.4969, "step": 461 }, { "epoch": 0.19458093499929804, "grad_norm": 0.5842528343200684, "learning_rate": 6.479663394109398e-06, "loss": 0.541, "step": 462 }, { "epoch": 0.19500210585427488, "grad_norm": 0.5760007500648499, "learning_rate": 6.4936886395511925e-06, "loss": 0.5735, "step": 463 }, { "epoch": 0.19542327670925172, "grad_norm": 0.5553972721099854, "learning_rate": 6.507713884992988e-06, "loss": 0.5201, "step": 464 }, { "epoch": 0.19584444756422856, "grad_norm": 0.6087374687194824, "learning_rate": 6.521739130434783e-06, "loss": 0.5264, "step": 465 }, { "epoch": 0.1962656184192054, "grad_norm": 0.5486700534820557, "learning_rate": 6.5357643758765785e-06, "loss": 0.5131, "step": 466 }, { "epoch": 0.19668678927418223, "grad_norm": 0.630050539970398, "learning_rate": 6.549789621318373e-06, "loss": 0.5541, "step": 467 }, { "epoch": 0.19710796012915907, "grad_norm": 0.7077353596687317, "learning_rate": 6.563814866760169e-06, "loss": 0.5641, "step": 468 }, { "epoch": 0.1975291309841359, "grad_norm": 0.6196013689041138, "learning_rate": 6.577840112201964e-06, "loss": 0.5454, "step": 469 }, { "epoch": 0.19795030183911275, "grad_norm": 0.5843043327331543, "learning_rate": 6.591865357643759e-06, "loss": 0.5284, "step": 470 }, { "epoch": 0.19837147269408956, "grad_norm": 0.6019142270088196, "learning_rate": 6.605890603085554e-06, "loss": 0.5009, "step": 471 }, { "epoch": 0.1987926435490664, "grad_norm": 0.625191330909729, "learning_rate": 6.61991584852735e-06, "loss": 0.5221, "step": 472 }, { "epoch": 0.19921381440404323, "grad_norm": 0.633883535861969, "learning_rate": 6.6339410939691455e-06, "loss": 0.5301, "step": 473 }, { "epoch": 0.19963498525902007, "grad_norm": 0.5880273580551147, "learning_rate": 6.64796633941094e-06, "loss": 0.4999, "step": 474 }, { "epoch": 0.2000561561139969, "grad_norm": 0.6648816466331482, "learning_rate": 6.661991584852735e-06, "loss": 0.536, "step": 475 }, { "epoch": 0.20047732696897375, "grad_norm": 0.5558480620384216, "learning_rate": 6.676016830294531e-06, "loss": 0.5348, "step": 476 }, { "epoch": 0.20089849782395058, "grad_norm": 0.6763182878494263, "learning_rate": 6.690042075736326e-06, "loss": 0.5145, "step": 477 }, { "epoch": 0.20131966867892742, "grad_norm": 0.6273434162139893, "learning_rate": 6.704067321178121e-06, "loss": 0.5788, "step": 478 }, { "epoch": 0.20174083953390426, "grad_norm": 0.5827475786209106, "learning_rate": 6.718092566619916e-06, "loss": 0.5284, "step": 479 }, { "epoch": 0.2021620103888811, "grad_norm": 0.5787315964698792, "learning_rate": 6.7321178120617116e-06, "loss": 0.5004, "step": 480 }, { "epoch": 0.20258318124385793, "grad_norm": 0.5381258726119995, "learning_rate": 6.746143057503507e-06, "loss": 0.5284, "step": 481 }, { "epoch": 0.20300435209883477, "grad_norm": 0.6229107975959778, "learning_rate": 6.760168302945303e-06, "loss": 0.5076, "step": 482 }, { "epoch": 0.20342552295381158, "grad_norm": 0.5679491758346558, "learning_rate": 6.774193548387097e-06, "loss": 0.5402, "step": 483 }, { "epoch": 0.20384669380878842, "grad_norm": 0.562599241733551, "learning_rate": 6.788218793828892e-06, "loss": 0.5149, "step": 484 }, { "epoch": 0.20426786466376526, "grad_norm": 0.5992795825004578, "learning_rate": 6.802244039270688e-06, "loss": 0.5236, "step": 485 }, { "epoch": 0.2046890355187421, "grad_norm": 0.5924989581108093, "learning_rate": 6.816269284712484e-06, "loss": 0.5062, "step": 486 }, { "epoch": 0.20511020637371893, "grad_norm": 0.5973644256591797, "learning_rate": 6.830294530154278e-06, "loss": 0.5102, "step": 487 }, { "epoch": 0.20553137722869577, "grad_norm": 0.57747882604599, "learning_rate": 6.844319775596073e-06, "loss": 0.5161, "step": 488 }, { "epoch": 0.2059525480836726, "grad_norm": 0.5788319110870361, "learning_rate": 6.858345021037869e-06, "loss": 0.5298, "step": 489 }, { "epoch": 0.20637371893864945, "grad_norm": 0.5964310169219971, "learning_rate": 6.8723702664796646e-06, "loss": 0.4936, "step": 490 }, { "epoch": 0.20679488979362629, "grad_norm": 0.5509951114654541, "learning_rate": 6.8863955119214585e-06, "loss": 0.5226, "step": 491 }, { "epoch": 0.20721606064860312, "grad_norm": 0.5542705059051514, "learning_rate": 6.900420757363254e-06, "loss": 0.544, "step": 492 }, { "epoch": 0.20763723150357996, "grad_norm": 0.5117254853248596, "learning_rate": 6.91444600280505e-06, "loss": 0.5098, "step": 493 }, { "epoch": 0.2080584023585568, "grad_norm": 0.6719217896461487, "learning_rate": 6.9284712482468454e-06, "loss": 0.5674, "step": 494 }, { "epoch": 0.20847957321353364, "grad_norm": 0.5323203802108765, "learning_rate": 6.942496493688639e-06, "loss": 0.5403, "step": 495 }, { "epoch": 0.20890074406851045, "grad_norm": 0.5797265768051147, "learning_rate": 6.956521739130435e-06, "loss": 0.5218, "step": 496 }, { "epoch": 0.20932191492348728, "grad_norm": 0.5928699970245361, "learning_rate": 6.970546984572231e-06, "loss": 0.5498, "step": 497 }, { "epoch": 0.20974308577846412, "grad_norm": 0.5383062362670898, "learning_rate": 6.984572230014026e-06, "loss": 0.5101, "step": 498 }, { "epoch": 0.21016425663344096, "grad_norm": 0.48087573051452637, "learning_rate": 6.99859747545582e-06, "loss": 0.4846, "step": 499 }, { "epoch": 0.2105854274884178, "grad_norm": 0.6221263408660889, "learning_rate": 7.012622720897616e-06, "loss": 0.5086, "step": 500 }, { "epoch": 0.21100659834339464, "grad_norm": 0.5584067702293396, "learning_rate": 7.0266479663394115e-06, "loss": 0.5115, "step": 501 }, { "epoch": 0.21142776919837147, "grad_norm": 0.5110412836074829, "learning_rate": 7.040673211781207e-06, "loss": 0.5093, "step": 502 }, { "epoch": 0.2118489400533483, "grad_norm": 0.537513017654419, "learning_rate": 7.054698457223001e-06, "loss": 0.487, "step": 503 }, { "epoch": 0.21227011090832515, "grad_norm": 0.5929871797561646, "learning_rate": 7.068723702664797e-06, "loss": 0.5432, "step": 504 }, { "epoch": 0.212691281763302, "grad_norm": 0.6055575609207153, "learning_rate": 7.082748948106592e-06, "loss": 0.5501, "step": 505 }, { "epoch": 0.21311245261827882, "grad_norm": 0.5509254932403564, "learning_rate": 7.096774193548388e-06, "loss": 0.4834, "step": 506 }, { "epoch": 0.21353362347325566, "grad_norm": 0.5604844093322754, "learning_rate": 7.110799438990183e-06, "loss": 0.5174, "step": 507 }, { "epoch": 0.21395479432823247, "grad_norm": 0.6133124232292175, "learning_rate": 7.124824684431978e-06, "loss": 0.5378, "step": 508 }, { "epoch": 0.2143759651832093, "grad_norm": 0.676546573638916, "learning_rate": 7.138849929873773e-06, "loss": 0.5873, "step": 509 }, { "epoch": 0.21479713603818615, "grad_norm": 0.5891737937927246, "learning_rate": 7.152875175315569e-06, "loss": 0.579, "step": 510 }, { "epoch": 0.215218306893163, "grad_norm": 0.6467000842094421, "learning_rate": 7.166900420757364e-06, "loss": 0.5254, "step": 511 }, { "epoch": 0.21563947774813982, "grad_norm": 0.628291666507721, "learning_rate": 7.1809256661991585e-06, "loss": 0.546, "step": 512 }, { "epoch": 0.21606064860311666, "grad_norm": 0.5579304695129395, "learning_rate": 7.194950911640954e-06, "loss": 0.5051, "step": 513 }, { "epoch": 0.2164818194580935, "grad_norm": 0.5346967577934265, "learning_rate": 7.20897615708275e-06, "loss": 0.4969, "step": 514 }, { "epoch": 0.21690299031307034, "grad_norm": 0.6628580689430237, "learning_rate": 7.2230014025245446e-06, "loss": 0.518, "step": 515 }, { "epoch": 0.21732416116804718, "grad_norm": 0.5667567253112793, "learning_rate": 7.23702664796634e-06, "loss": 0.4913, "step": 516 }, { "epoch": 0.217745332023024, "grad_norm": 0.6033014059066772, "learning_rate": 7.251051893408135e-06, "loss": 0.5253, "step": 517 }, { "epoch": 0.21816650287800085, "grad_norm": 0.5692185759544373, "learning_rate": 7.265077138849931e-06, "loss": 0.4763, "step": 518 }, { "epoch": 0.2185876737329777, "grad_norm": 0.5536398887634277, "learning_rate": 7.2791023842917254e-06, "loss": 0.5178, "step": 519 }, { "epoch": 0.21900884458795453, "grad_norm": 0.560522198677063, "learning_rate": 7.293127629733521e-06, "loss": 0.5274, "step": 520 }, { "epoch": 0.21943001544293134, "grad_norm": 0.5162234902381897, "learning_rate": 7.307152875175316e-06, "loss": 0.5124, "step": 521 }, { "epoch": 0.21985118629790817, "grad_norm": 0.5934609770774841, "learning_rate": 7.3211781206171115e-06, "loss": 0.556, "step": 522 }, { "epoch": 0.220272357152885, "grad_norm": 0.6036375761032104, "learning_rate": 7.335203366058906e-06, "loss": 0.5564, "step": 523 }, { "epoch": 0.22069352800786185, "grad_norm": 0.5756515264511108, "learning_rate": 7.349228611500702e-06, "loss": 0.5179, "step": 524 }, { "epoch": 0.2211146988628387, "grad_norm": 0.5522965788841248, "learning_rate": 7.3632538569424976e-06, "loss": 0.5178, "step": 525 }, { "epoch": 0.22153586971781553, "grad_norm": 0.5128467679023743, "learning_rate": 7.377279102384292e-06, "loss": 0.5039, "step": 526 }, { "epoch": 0.22195704057279236, "grad_norm": 0.592068076133728, "learning_rate": 7.391304347826087e-06, "loss": 0.5457, "step": 527 }, { "epoch": 0.2223782114277692, "grad_norm": 0.5722931027412415, "learning_rate": 7.405329593267883e-06, "loss": 0.5067, "step": 528 }, { "epoch": 0.22279938228274604, "grad_norm": 0.5954606533050537, "learning_rate": 7.4193548387096784e-06, "loss": 0.5427, "step": 529 }, { "epoch": 0.22322055313772288, "grad_norm": 0.5335581302642822, "learning_rate": 7.433380084151473e-06, "loss": 0.5202, "step": 530 }, { "epoch": 0.22364172399269971, "grad_norm": 0.5604815483093262, "learning_rate": 7.447405329593268e-06, "loss": 0.514, "step": 531 }, { "epoch": 0.22406289484767655, "grad_norm": 0.5952799320220947, "learning_rate": 7.461430575035064e-06, "loss": 0.4857, "step": 532 }, { "epoch": 0.22448406570265336, "grad_norm": 0.5768789649009705, "learning_rate": 7.475455820476859e-06, "loss": 0.484, "step": 533 }, { "epoch": 0.2249052365576302, "grad_norm": 0.5580201148986816, "learning_rate": 7.489481065918655e-06, "loss": 0.5033, "step": 534 }, { "epoch": 0.22532640741260704, "grad_norm": 0.67402583360672, "learning_rate": 7.503506311360449e-06, "loss": 0.5632, "step": 535 }, { "epoch": 0.22574757826758388, "grad_norm": 0.5812245607376099, "learning_rate": 7.5175315568022445e-06, "loss": 0.4953, "step": 536 }, { "epoch": 0.2261687491225607, "grad_norm": 0.6200184226036072, "learning_rate": 7.53155680224404e-06, "loss": 0.554, "step": 537 }, { "epoch": 0.22658991997753755, "grad_norm": 0.706803560256958, "learning_rate": 7.545582047685836e-06, "loss": 0.5464, "step": 538 }, { "epoch": 0.2270110908325144, "grad_norm": 0.5465744137763977, "learning_rate": 7.55960729312763e-06, "loss": 0.4624, "step": 539 }, { "epoch": 0.22743226168749123, "grad_norm": 0.6101973056793213, "learning_rate": 7.573632538569425e-06, "loss": 0.5234, "step": 540 }, { "epoch": 0.22785343254246807, "grad_norm": 0.560723602771759, "learning_rate": 7.587657784011221e-06, "loss": 0.5083, "step": 541 }, { "epoch": 0.2282746033974449, "grad_norm": 0.5972851514816284, "learning_rate": 7.601683029453017e-06, "loss": 0.4978, "step": 542 }, { "epoch": 0.22869577425242174, "grad_norm": 0.635891318321228, "learning_rate": 7.615708274894811e-06, "loss": 0.5168, "step": 543 }, { "epoch": 0.22911694510739858, "grad_norm": 0.5337158441543579, "learning_rate": 7.629733520336606e-06, "loss": 0.4851, "step": 544 }, { "epoch": 0.22953811596237542, "grad_norm": 0.58110111951828, "learning_rate": 7.643758765778401e-06, "loss": 0.5284, "step": 545 }, { "epoch": 0.22995928681735223, "grad_norm": 0.5700879096984863, "learning_rate": 7.657784011220198e-06, "loss": 0.4791, "step": 546 }, { "epoch": 0.23038045767232906, "grad_norm": 0.5719430446624756, "learning_rate": 7.671809256661992e-06, "loss": 0.542, "step": 547 }, { "epoch": 0.2308016285273059, "grad_norm": 0.5459939241409302, "learning_rate": 7.685834502103787e-06, "loss": 0.5009, "step": 548 }, { "epoch": 0.23122279938228274, "grad_norm": 0.48539999127388, "learning_rate": 7.699859747545582e-06, "loss": 0.4957, "step": 549 }, { "epoch": 0.23164397023725958, "grad_norm": 0.6014862060546875, "learning_rate": 7.713884992987378e-06, "loss": 0.5503, "step": 550 }, { "epoch": 0.23206514109223642, "grad_norm": 0.526176393032074, "learning_rate": 7.727910238429173e-06, "loss": 0.4995, "step": 551 }, { "epoch": 0.23248631194721325, "grad_norm": 0.5522193312644958, "learning_rate": 7.741935483870968e-06, "loss": 0.5448, "step": 552 }, { "epoch": 0.2329074828021901, "grad_norm": 0.48734238743782043, "learning_rate": 7.755960729312764e-06, "loss": 0.5191, "step": 553 }, { "epoch": 0.23332865365716693, "grad_norm": 0.48238134384155273, "learning_rate": 7.76998597475456e-06, "loss": 0.5074, "step": 554 }, { "epoch": 0.23374982451214377, "grad_norm": 0.5777407288551331, "learning_rate": 7.784011220196354e-06, "loss": 0.5028, "step": 555 }, { "epoch": 0.2341709953671206, "grad_norm": 0.5261020064353943, "learning_rate": 7.798036465638149e-06, "loss": 0.4698, "step": 556 }, { "epoch": 0.23459216622209744, "grad_norm": 0.5619714260101318, "learning_rate": 7.812061711079945e-06, "loss": 0.5339, "step": 557 }, { "epoch": 0.23501333707707428, "grad_norm": 0.5919780731201172, "learning_rate": 7.82608695652174e-06, "loss": 0.5513, "step": 558 }, { "epoch": 0.2354345079320511, "grad_norm": 0.6144738793373108, "learning_rate": 7.840112201963535e-06, "loss": 0.5522, "step": 559 }, { "epoch": 0.23585567878702793, "grad_norm": 0.5619809627532959, "learning_rate": 7.85413744740533e-06, "loss": 0.5182, "step": 560 }, { "epoch": 0.23627684964200477, "grad_norm": 0.5849460363388062, "learning_rate": 7.868162692847126e-06, "loss": 0.5362, "step": 561 }, { "epoch": 0.2366980204969816, "grad_norm": 0.6504635214805603, "learning_rate": 7.882187938288921e-06, "loss": 0.5395, "step": 562 }, { "epoch": 0.23711919135195844, "grad_norm": 0.5504747033119202, "learning_rate": 7.896213183730716e-06, "loss": 0.4984, "step": 563 }, { "epoch": 0.23754036220693528, "grad_norm": 0.5612363815307617, "learning_rate": 7.91023842917251e-06, "loss": 0.4934, "step": 564 }, { "epoch": 0.23796153306191212, "grad_norm": 0.5665110945701599, "learning_rate": 7.924263674614307e-06, "loss": 0.5309, "step": 565 }, { "epoch": 0.23838270391688895, "grad_norm": 0.5375802516937256, "learning_rate": 7.938288920056102e-06, "loss": 0.5033, "step": 566 }, { "epoch": 0.2388038747718658, "grad_norm": 0.5878860354423523, "learning_rate": 7.952314165497897e-06, "loss": 0.5076, "step": 567 }, { "epoch": 0.23922504562684263, "grad_norm": 0.5380121469497681, "learning_rate": 7.966339410939691e-06, "loss": 0.5249, "step": 568 }, { "epoch": 0.23964621648181947, "grad_norm": 0.5719384551048279, "learning_rate": 7.980364656381488e-06, "loss": 0.5014, "step": 569 }, { "epoch": 0.2400673873367963, "grad_norm": 0.6327529549598694, "learning_rate": 7.994389901823283e-06, "loss": 0.539, "step": 570 }, { "epoch": 0.24048855819177312, "grad_norm": 0.5738524794578552, "learning_rate": 8.008415147265078e-06, "loss": 0.5359, "step": 571 }, { "epoch": 0.24090972904674995, "grad_norm": 0.5455966591835022, "learning_rate": 8.022440392706872e-06, "loss": 0.5466, "step": 572 }, { "epoch": 0.2413308999017268, "grad_norm": 0.5996169447898865, "learning_rate": 8.036465638148669e-06, "loss": 0.5082, "step": 573 }, { "epoch": 0.24175207075670363, "grad_norm": 0.605817973613739, "learning_rate": 8.050490883590464e-06, "loss": 0.5706, "step": 574 }, { "epoch": 0.24217324161168047, "grad_norm": 0.5407811403274536, "learning_rate": 8.064516129032258e-06, "loss": 0.5012, "step": 575 }, { "epoch": 0.2425944124666573, "grad_norm": 0.627113401889801, "learning_rate": 8.078541374474053e-06, "loss": 0.4928, "step": 576 }, { "epoch": 0.24301558332163414, "grad_norm": 0.594451904296875, "learning_rate": 8.09256661991585e-06, "loss": 0.5242, "step": 577 }, { "epoch": 0.24343675417661098, "grad_norm": 0.5194963216781616, "learning_rate": 8.106591865357644e-06, "loss": 0.5032, "step": 578 }, { "epoch": 0.24385792503158782, "grad_norm": 0.5647953152656555, "learning_rate": 8.12061711079944e-06, "loss": 0.49, "step": 579 }, { "epoch": 0.24427909588656466, "grad_norm": 0.565341591835022, "learning_rate": 8.134642356241234e-06, "loss": 0.5421, "step": 580 }, { "epoch": 0.2447002667415415, "grad_norm": 0.559299886226654, "learning_rate": 8.14866760168303e-06, "loss": 0.5182, "step": 581 }, { "epoch": 0.24512143759651833, "grad_norm": 0.5549270510673523, "learning_rate": 8.162692847124825e-06, "loss": 0.5531, "step": 582 }, { "epoch": 0.24554260845149517, "grad_norm": 0.649933397769928, "learning_rate": 8.17671809256662e-06, "loss": 0.5283, "step": 583 }, { "epoch": 0.24596377930647198, "grad_norm": 0.5338761806488037, "learning_rate": 8.190743338008415e-06, "loss": 0.5044, "step": 584 }, { "epoch": 0.24638495016144882, "grad_norm": 0.5785558819770813, "learning_rate": 8.204768583450211e-06, "loss": 0.5007, "step": 585 }, { "epoch": 0.24680612101642566, "grad_norm": 0.6019935011863708, "learning_rate": 8.218793828892006e-06, "loss": 0.5318, "step": 586 }, { "epoch": 0.2472272918714025, "grad_norm": 0.6757105588912964, "learning_rate": 8.232819074333801e-06, "loss": 0.5554, "step": 587 }, { "epoch": 0.24764846272637933, "grad_norm": 0.5707047581672668, "learning_rate": 8.246844319775596e-06, "loss": 0.515, "step": 588 }, { "epoch": 0.24806963358135617, "grad_norm": 0.5231472253799438, "learning_rate": 8.260869565217392e-06, "loss": 0.5087, "step": 589 }, { "epoch": 0.248490804436333, "grad_norm": 0.6290871500968933, "learning_rate": 8.274894810659187e-06, "loss": 0.5145, "step": 590 }, { "epoch": 0.24891197529130984, "grad_norm": 0.6602106690406799, "learning_rate": 8.288920056100982e-06, "loss": 0.5165, "step": 591 }, { "epoch": 0.24933314614628668, "grad_norm": 0.6247860789299011, "learning_rate": 8.302945301542777e-06, "loss": 0.5098, "step": 592 }, { "epoch": 0.24975431700126352, "grad_norm": 0.606377124786377, "learning_rate": 8.316970546984573e-06, "loss": 0.5352, "step": 593 }, { "epoch": 0.25017548785624033, "grad_norm": 0.5930733680725098, "learning_rate": 8.330995792426368e-06, "loss": 0.4976, "step": 594 }, { "epoch": 0.25059665871121717, "grad_norm": 0.6751664876937866, "learning_rate": 8.345021037868163e-06, "loss": 0.5255, "step": 595 }, { "epoch": 0.251017829566194, "grad_norm": 0.5674278140068054, "learning_rate": 8.35904628330996e-06, "loss": 0.5265, "step": 596 }, { "epoch": 0.25143900042117084, "grad_norm": 0.5434824824333191, "learning_rate": 8.373071528751754e-06, "loss": 0.4943, "step": 597 }, { "epoch": 0.2518601712761477, "grad_norm": 0.6718891263008118, "learning_rate": 8.387096774193549e-06, "loss": 0.517, "step": 598 }, { "epoch": 0.2522813421311245, "grad_norm": 0.5734667181968689, "learning_rate": 8.401122019635344e-06, "loss": 0.4841, "step": 599 }, { "epoch": 0.25270251298610136, "grad_norm": 0.6257987022399902, "learning_rate": 8.41514726507714e-06, "loss": 0.5272, "step": 600 }, { "epoch": 0.2531236838410782, "grad_norm": 0.7105362415313721, "learning_rate": 8.429172510518935e-06, "loss": 0.4982, "step": 601 }, { "epoch": 0.25354485469605503, "grad_norm": 0.5420646071434021, "learning_rate": 8.44319775596073e-06, "loss": 0.5547, "step": 602 }, { "epoch": 0.25396602555103187, "grad_norm": 0.5157469511032104, "learning_rate": 8.457223001402524e-06, "loss": 0.5224, "step": 603 }, { "epoch": 0.2543871964060087, "grad_norm": 0.5659580230712891, "learning_rate": 8.471248246844321e-06, "loss": 0.5705, "step": 604 }, { "epoch": 0.25480836726098555, "grad_norm": 0.5548718571662903, "learning_rate": 8.485273492286116e-06, "loss": 0.5007, "step": 605 }, { "epoch": 0.2552295381159624, "grad_norm": 0.5591087341308594, "learning_rate": 8.49929873772791e-06, "loss": 0.4628, "step": 606 }, { "epoch": 0.2556507089709392, "grad_norm": 0.6290849447250366, "learning_rate": 8.513323983169705e-06, "loss": 0.4991, "step": 607 }, { "epoch": 0.25607187982591606, "grad_norm": 0.5356407761573792, "learning_rate": 8.527349228611502e-06, "loss": 0.5121, "step": 608 }, { "epoch": 0.2564930506808929, "grad_norm": 0.6455509066581726, "learning_rate": 8.541374474053297e-06, "loss": 0.5356, "step": 609 }, { "epoch": 0.25691422153586974, "grad_norm": 0.6789543032646179, "learning_rate": 8.555399719495091e-06, "loss": 0.5521, "step": 610 }, { "epoch": 0.2573353923908466, "grad_norm": 0.5650131702423096, "learning_rate": 8.569424964936886e-06, "loss": 0.4903, "step": 611 }, { "epoch": 0.2577565632458234, "grad_norm": 0.6457734704017639, "learning_rate": 8.583450210378683e-06, "loss": 0.5154, "step": 612 }, { "epoch": 0.25817773410080025, "grad_norm": 0.5774135589599609, "learning_rate": 8.597475455820477e-06, "loss": 0.5167, "step": 613 }, { "epoch": 0.2585989049557771, "grad_norm": 0.52033931016922, "learning_rate": 8.611500701262272e-06, "loss": 0.4922, "step": 614 }, { "epoch": 0.25902007581075387, "grad_norm": 0.6395926475524902, "learning_rate": 8.625525946704067e-06, "loss": 0.495, "step": 615 }, { "epoch": 0.2594412466657307, "grad_norm": 0.6192256212234497, "learning_rate": 8.639551192145864e-06, "loss": 0.4973, "step": 616 }, { "epoch": 0.25986241752070754, "grad_norm": 0.622986376285553, "learning_rate": 8.653576437587658e-06, "loss": 0.5691, "step": 617 }, { "epoch": 0.2602835883756844, "grad_norm": 0.6397612690925598, "learning_rate": 8.667601683029455e-06, "loss": 0.464, "step": 618 }, { "epoch": 0.2607047592306612, "grad_norm": 0.6248738169670105, "learning_rate": 8.681626928471248e-06, "loss": 0.5673, "step": 619 }, { "epoch": 0.26112593008563806, "grad_norm": 0.5467785596847534, "learning_rate": 8.695652173913044e-06, "loss": 0.5251, "step": 620 }, { "epoch": 0.2615471009406149, "grad_norm": 0.6756175756454468, "learning_rate": 8.70967741935484e-06, "loss": 0.5555, "step": 621 }, { "epoch": 0.26196827179559173, "grad_norm": 0.590532124042511, "learning_rate": 8.723702664796636e-06, "loss": 0.5366, "step": 622 }, { "epoch": 0.26238944265056857, "grad_norm": 0.5070919990539551, "learning_rate": 8.737727910238429e-06, "loss": 0.4957, "step": 623 }, { "epoch": 0.2628106135055454, "grad_norm": 0.6344675421714783, "learning_rate": 8.751753155680225e-06, "loss": 0.5213, "step": 624 }, { "epoch": 0.26323178436052225, "grad_norm": 0.5669318437576294, "learning_rate": 8.76577840112202e-06, "loss": 0.5112, "step": 625 }, { "epoch": 0.2636529552154991, "grad_norm": 0.6027851700782776, "learning_rate": 8.779803646563817e-06, "loss": 0.5132, "step": 626 }, { "epoch": 0.2640741260704759, "grad_norm": 0.6136533617973328, "learning_rate": 8.79382889200561e-06, "loss": 0.5277, "step": 627 }, { "epoch": 0.26449529692545276, "grad_norm": 0.541693389415741, "learning_rate": 8.807854137447406e-06, "loss": 0.5261, "step": 628 }, { "epoch": 0.2649164677804296, "grad_norm": 0.5863533020019531, "learning_rate": 8.821879382889201e-06, "loss": 0.5024, "step": 629 }, { "epoch": 0.26533763863540644, "grad_norm": 0.6379204988479614, "learning_rate": 8.835904628330997e-06, "loss": 0.4949, "step": 630 }, { "epoch": 0.2657588094903833, "grad_norm": 0.5446453094482422, "learning_rate": 8.84992987377279e-06, "loss": 0.4872, "step": 631 }, { "epoch": 0.2661799803453601, "grad_norm": 0.5407416224479675, "learning_rate": 8.863955119214587e-06, "loss": 0.4761, "step": 632 }, { "epoch": 0.26660115120033695, "grad_norm": 0.6231432557106018, "learning_rate": 8.877980364656382e-06, "loss": 0.5296, "step": 633 }, { "epoch": 0.2670223220553138, "grad_norm": 0.5571640133857727, "learning_rate": 8.892005610098178e-06, "loss": 0.5165, "step": 634 }, { "epoch": 0.2674434929102906, "grad_norm": 0.547208309173584, "learning_rate": 8.906030855539971e-06, "loss": 0.5578, "step": 635 }, { "epoch": 0.26786466376526746, "grad_norm": 0.7296070456504822, "learning_rate": 8.920056100981768e-06, "loss": 0.5485, "step": 636 }, { "epoch": 0.2682858346202443, "grad_norm": 0.6080478429794312, "learning_rate": 8.934081346423563e-06, "loss": 0.5653, "step": 637 }, { "epoch": 0.26870700547522114, "grad_norm": 0.5737602114677429, "learning_rate": 8.94810659186536e-06, "loss": 0.4949, "step": 638 }, { "epoch": 0.269128176330198, "grad_norm": 0.5694191455841064, "learning_rate": 8.962131837307152e-06, "loss": 0.5581, "step": 639 }, { "epoch": 0.26954934718517476, "grad_norm": 0.5222040414810181, "learning_rate": 8.976157082748949e-06, "loss": 0.4751, "step": 640 }, { "epoch": 0.2699705180401516, "grad_norm": 0.4902222752571106, "learning_rate": 8.990182328190744e-06, "loss": 0.4724, "step": 641 }, { "epoch": 0.27039168889512843, "grad_norm": 0.5687739253044128, "learning_rate": 9.00420757363254e-06, "loss": 0.5337, "step": 642 }, { "epoch": 0.2708128597501053, "grad_norm": 0.6535989046096802, "learning_rate": 9.018232819074335e-06, "loss": 0.5411, "step": 643 }, { "epoch": 0.2712340306050821, "grad_norm": 0.5812592506408691, "learning_rate": 9.03225806451613e-06, "loss": 0.4581, "step": 644 }, { "epoch": 0.27165520146005895, "grad_norm": 0.6178983449935913, "learning_rate": 9.046283309957924e-06, "loss": 0.5366, "step": 645 }, { "epoch": 0.2720763723150358, "grad_norm": 0.5553752779960632, "learning_rate": 9.060308555399721e-06, "loss": 0.5317, "step": 646 }, { "epoch": 0.2724975431700126, "grad_norm": 0.6041153073310852, "learning_rate": 9.074333800841516e-06, "loss": 0.4932, "step": 647 }, { "epoch": 0.27291871402498946, "grad_norm": 0.5749430656433105, "learning_rate": 9.08835904628331e-06, "loss": 0.5047, "step": 648 }, { "epoch": 0.2733398848799663, "grad_norm": 0.6336672902107239, "learning_rate": 9.102384291725105e-06, "loss": 0.5291, "step": 649 }, { "epoch": 0.27376105573494314, "grad_norm": 0.6441599726676941, "learning_rate": 9.116409537166902e-06, "loss": 0.5375, "step": 650 }, { "epoch": 0.27418222658992, "grad_norm": 0.6044341325759888, "learning_rate": 9.130434782608697e-06, "loss": 0.5656, "step": 651 }, { "epoch": 0.2746033974448968, "grad_norm": 0.6183293461799622, "learning_rate": 9.144460028050491e-06, "loss": 0.5526, "step": 652 }, { "epoch": 0.27502456829987365, "grad_norm": 0.5911725163459778, "learning_rate": 9.158485273492286e-06, "loss": 0.5248, "step": 653 }, { "epoch": 0.2754457391548505, "grad_norm": 0.6041329503059387, "learning_rate": 9.172510518934083e-06, "loss": 0.5199, "step": 654 }, { "epoch": 0.2758669100098273, "grad_norm": 0.5691907405853271, "learning_rate": 9.186535764375877e-06, "loss": 0.4972, "step": 655 }, { "epoch": 0.27628808086480416, "grad_norm": 0.5383079051971436, "learning_rate": 9.200561009817672e-06, "loss": 0.5007, "step": 656 }, { "epoch": 0.276709251719781, "grad_norm": 0.5436285734176636, "learning_rate": 9.214586255259467e-06, "loss": 0.4917, "step": 657 }, { "epoch": 0.27713042257475784, "grad_norm": 0.5960427522659302, "learning_rate": 9.228611500701263e-06, "loss": 0.5127, "step": 658 }, { "epoch": 0.2775515934297347, "grad_norm": 0.5489287376403809, "learning_rate": 9.242636746143058e-06, "loss": 0.4953, "step": 659 }, { "epoch": 0.2779727642847115, "grad_norm": 0.6361215710639954, "learning_rate": 9.256661991584853e-06, "loss": 0.522, "step": 660 }, { "epoch": 0.27839393513968835, "grad_norm": 0.6282075047492981, "learning_rate": 9.27068723702665e-06, "loss": 0.5611, "step": 661 }, { "epoch": 0.2788151059946652, "grad_norm": 0.5737236142158508, "learning_rate": 9.284712482468444e-06, "loss": 0.5551, "step": 662 }, { "epoch": 0.27923627684964203, "grad_norm": 0.6089053750038147, "learning_rate": 9.298737727910239e-06, "loss": 0.5157, "step": 663 }, { "epoch": 0.27965744770461887, "grad_norm": 0.5965275764465332, "learning_rate": 9.312762973352034e-06, "loss": 0.4921, "step": 664 }, { "epoch": 0.28007861855959565, "grad_norm": 0.6065980195999146, "learning_rate": 9.32678821879383e-06, "loss": 0.5186, "step": 665 }, { "epoch": 0.2804997894145725, "grad_norm": 0.536060094833374, "learning_rate": 9.340813464235625e-06, "loss": 0.5153, "step": 666 }, { "epoch": 0.2809209602695493, "grad_norm": 0.5557302832603455, "learning_rate": 9.35483870967742e-06, "loss": 0.4898, "step": 667 }, { "epoch": 0.28134213112452616, "grad_norm": 0.5761020183563232, "learning_rate": 9.368863955119215e-06, "loss": 0.5193, "step": 668 }, { "epoch": 0.281763301979503, "grad_norm": 0.5998614430427551, "learning_rate": 9.382889200561011e-06, "loss": 0.5023, "step": 669 }, { "epoch": 0.28218447283447984, "grad_norm": 0.5239495038986206, "learning_rate": 9.396914446002806e-06, "loss": 0.5015, "step": 670 }, { "epoch": 0.2826056436894567, "grad_norm": 0.5475460290908813, "learning_rate": 9.410939691444601e-06, "loss": 0.5121, "step": 671 }, { "epoch": 0.2830268145444335, "grad_norm": 0.6207257509231567, "learning_rate": 9.424964936886396e-06, "loss": 0.533, "step": 672 }, { "epoch": 0.28344798539941035, "grad_norm": 0.5956335067749023, "learning_rate": 9.438990182328192e-06, "loss": 0.4708, "step": 673 }, { "epoch": 0.2838691562543872, "grad_norm": 0.6260802745819092, "learning_rate": 9.453015427769987e-06, "loss": 0.5444, "step": 674 }, { "epoch": 0.284290327109364, "grad_norm": 0.5560049414634705, "learning_rate": 9.467040673211782e-06, "loss": 0.5309, "step": 675 }, { "epoch": 0.28471149796434086, "grad_norm": 0.5324731469154358, "learning_rate": 9.481065918653577e-06, "loss": 0.5342, "step": 676 }, { "epoch": 0.2851326688193177, "grad_norm": 0.6215620636940002, "learning_rate": 9.495091164095373e-06, "loss": 0.5258, "step": 677 }, { "epoch": 0.28555383967429454, "grad_norm": 0.5959873199462891, "learning_rate": 9.509116409537168e-06, "loss": 0.5309, "step": 678 }, { "epoch": 0.2859750105292714, "grad_norm": 0.5840930938720703, "learning_rate": 9.523141654978963e-06, "loss": 0.5287, "step": 679 }, { "epoch": 0.2863961813842482, "grad_norm": 0.5964308381080627, "learning_rate": 9.537166900420757e-06, "loss": 0.4969, "step": 680 }, { "epoch": 0.28681735223922505, "grad_norm": 0.5806483030319214, "learning_rate": 9.551192145862554e-06, "loss": 0.5065, "step": 681 }, { "epoch": 0.2872385230942019, "grad_norm": 0.5876980423927307, "learning_rate": 9.565217391304349e-06, "loss": 0.5045, "step": 682 }, { "epoch": 0.28765969394917873, "grad_norm": 0.7002311944961548, "learning_rate": 9.579242636746143e-06, "loss": 0.5386, "step": 683 }, { "epoch": 0.28808086480415557, "grad_norm": 0.6927395462989807, "learning_rate": 9.593267882187938e-06, "loss": 0.5515, "step": 684 }, { "epoch": 0.2885020356591324, "grad_norm": 0.5937376618385315, "learning_rate": 9.607293127629735e-06, "loss": 0.5244, "step": 685 }, { "epoch": 0.28892320651410924, "grad_norm": 0.6028384566307068, "learning_rate": 9.62131837307153e-06, "loss": 0.4846, "step": 686 }, { "epoch": 0.2893443773690861, "grad_norm": 0.6996963620185852, "learning_rate": 9.635343618513324e-06, "loss": 0.5736, "step": 687 }, { "epoch": 0.2897655482240629, "grad_norm": 0.6147079467773438, "learning_rate": 9.649368863955119e-06, "loss": 0.5194, "step": 688 }, { "epoch": 0.29018671907903976, "grad_norm": 0.5769491791725159, "learning_rate": 9.663394109396916e-06, "loss": 0.5092, "step": 689 }, { "epoch": 0.29060788993401654, "grad_norm": 0.5624260902404785, "learning_rate": 9.67741935483871e-06, "loss": 0.4952, "step": 690 }, { "epoch": 0.2910290607889934, "grad_norm": 0.5560654997825623, "learning_rate": 9.691444600280505e-06, "loss": 0.5242, "step": 691 }, { "epoch": 0.2914502316439702, "grad_norm": 0.5922946929931641, "learning_rate": 9.7054698457223e-06, "loss": 0.5107, "step": 692 }, { "epoch": 0.29187140249894705, "grad_norm": 0.5918301343917847, "learning_rate": 9.719495091164096e-06, "loss": 0.532, "step": 693 }, { "epoch": 0.2922925733539239, "grad_norm": 0.5240029096603394, "learning_rate": 9.733520336605891e-06, "loss": 0.5311, "step": 694 }, { "epoch": 0.29271374420890073, "grad_norm": 0.6384739875793457, "learning_rate": 9.747545582047686e-06, "loss": 0.5166, "step": 695 }, { "epoch": 0.29313491506387757, "grad_norm": 0.6077389717102051, "learning_rate": 9.761570827489481e-06, "loss": 0.5261, "step": 696 }, { "epoch": 0.2935560859188544, "grad_norm": 0.6170197129249573, "learning_rate": 9.775596072931277e-06, "loss": 0.5318, "step": 697 }, { "epoch": 0.29397725677383124, "grad_norm": 0.5926690101623535, "learning_rate": 9.789621318373072e-06, "loss": 0.5324, "step": 698 }, { "epoch": 0.2943984276288081, "grad_norm": 0.5384149551391602, "learning_rate": 9.803646563814867e-06, "loss": 0.5165, "step": 699 }, { "epoch": 0.2948195984837849, "grad_norm": 0.585185170173645, "learning_rate": 9.817671809256662e-06, "loss": 0.5265, "step": 700 }, { "epoch": 0.29524076933876175, "grad_norm": 0.6038224697113037, "learning_rate": 9.831697054698458e-06, "loss": 0.5434, "step": 701 }, { "epoch": 0.2956619401937386, "grad_norm": 0.6117435097694397, "learning_rate": 9.845722300140253e-06, "loss": 0.5363, "step": 702 }, { "epoch": 0.29608311104871543, "grad_norm": 0.5888546109199524, "learning_rate": 9.859747545582048e-06, "loss": 0.5222, "step": 703 }, { "epoch": 0.29650428190369227, "grad_norm": 0.5402876138687134, "learning_rate": 9.873772791023844e-06, "loss": 0.5084, "step": 704 }, { "epoch": 0.2969254527586691, "grad_norm": 0.6316883563995361, "learning_rate": 9.887798036465639e-06, "loss": 0.566, "step": 705 }, { "epoch": 0.29734662361364594, "grad_norm": 0.5665240287780762, "learning_rate": 9.901823281907434e-06, "loss": 0.4914, "step": 706 }, { "epoch": 0.2977677944686228, "grad_norm": 0.5718700289726257, "learning_rate": 9.915848527349229e-06, "loss": 0.5417, "step": 707 }, { "epoch": 0.2981889653235996, "grad_norm": 0.5676663517951965, "learning_rate": 9.929873772791025e-06, "loss": 0.4776, "step": 708 }, { "epoch": 0.29861013617857646, "grad_norm": 0.6133127808570862, "learning_rate": 9.94389901823282e-06, "loss": 0.5364, "step": 709 }, { "epoch": 0.2990313070335533, "grad_norm": 0.6922508478164673, "learning_rate": 9.957924263674615e-06, "loss": 0.5394, "step": 710 }, { "epoch": 0.29945247788853013, "grad_norm": 0.5675974488258362, "learning_rate": 9.97194950911641e-06, "loss": 0.5143, "step": 711 }, { "epoch": 0.29987364874350697, "grad_norm": 0.6300660371780396, "learning_rate": 9.985974754558206e-06, "loss": 0.5155, "step": 712 }, { "epoch": 0.3002948195984838, "grad_norm": 0.6917495727539062, "learning_rate": 1e-05, "loss": 0.5248, "step": 713 }, { "epoch": 0.30071599045346065, "grad_norm": 0.6937676072120667, "learning_rate": 9.999999399297828e-06, "loss": 0.5286, "step": 714 }, { "epoch": 0.30113716130843743, "grad_norm": 0.6279090642929077, "learning_rate": 9.999997597191458e-06, "loss": 0.5577, "step": 715 }, { "epoch": 0.30155833216341427, "grad_norm": 0.5705698728561401, "learning_rate": 9.99999459368132e-06, "loss": 0.4858, "step": 716 }, { "epoch": 0.3019795030183911, "grad_norm": 0.6699649691581726, "learning_rate": 9.999990388768136e-06, "loss": 0.4934, "step": 717 }, { "epoch": 0.30240067387336794, "grad_norm": 0.5476706027984619, "learning_rate": 9.99998498245292e-06, "loss": 0.4935, "step": 718 }, { "epoch": 0.3028218447283448, "grad_norm": 0.6664700508117676, "learning_rate": 9.999978374736967e-06, "loss": 0.5124, "step": 719 }, { "epoch": 0.3032430155833216, "grad_norm": 0.5980981588363647, "learning_rate": 9.999970565621867e-06, "loss": 0.4921, "step": 720 }, { "epoch": 0.30366418643829846, "grad_norm": 0.598310649394989, "learning_rate": 9.999961555109497e-06, "loss": 0.5296, "step": 721 }, { "epoch": 0.3040853572932753, "grad_norm": 0.6815471649169922, "learning_rate": 9.999951343202018e-06, "loss": 0.5516, "step": 722 }, { "epoch": 0.30450652814825213, "grad_norm": 0.605233371257782, "learning_rate": 9.999939929901888e-06, "loss": 0.5249, "step": 723 }, { "epoch": 0.30492769900322897, "grad_norm": 0.548182487487793, "learning_rate": 9.999927315211847e-06, "loss": 0.4795, "step": 724 }, { "epoch": 0.3053488698582058, "grad_norm": 0.5159181952476501, "learning_rate": 9.999913499134928e-06, "loss": 0.4976, "step": 725 }, { "epoch": 0.30577004071318264, "grad_norm": 0.6072083115577698, "learning_rate": 9.999898481674449e-06, "loss": 0.4981, "step": 726 }, { "epoch": 0.3061912115681595, "grad_norm": 0.5524949431419373, "learning_rate": 9.99988226283402e-06, "loss": 0.5346, "step": 727 }, { "epoch": 0.3066123824231363, "grad_norm": 0.5780781507492065, "learning_rate": 9.999864842617534e-06, "loss": 0.5181, "step": 728 }, { "epoch": 0.30703355327811316, "grad_norm": 0.5854024291038513, "learning_rate": 9.999846221029183e-06, "loss": 0.5463, "step": 729 }, { "epoch": 0.30745472413309, "grad_norm": 0.5953801274299622, "learning_rate": 9.999826398073438e-06, "loss": 0.5077, "step": 730 }, { "epoch": 0.30787589498806683, "grad_norm": 0.585114598274231, "learning_rate": 9.999805373755062e-06, "loss": 0.539, "step": 731 }, { "epoch": 0.30829706584304367, "grad_norm": 0.5802684426307678, "learning_rate": 9.999783148079107e-06, "loss": 0.5262, "step": 732 }, { "epoch": 0.3087182366980205, "grad_norm": 0.5838485956192017, "learning_rate": 9.999759721050915e-06, "loss": 0.5254, "step": 733 }, { "epoch": 0.30913940755299735, "grad_norm": 0.5368148684501648, "learning_rate": 9.999735092676112e-06, "loss": 0.4758, "step": 734 }, { "epoch": 0.3095605784079742, "grad_norm": 0.5879541635513306, "learning_rate": 9.999709262960618e-06, "loss": 0.5132, "step": 735 }, { "epoch": 0.309981749262951, "grad_norm": 0.5161323547363281, "learning_rate": 9.999682231910637e-06, "loss": 0.5226, "step": 736 }, { "epoch": 0.31040292011792786, "grad_norm": 0.5589486360549927, "learning_rate": 9.999653999532668e-06, "loss": 0.5014, "step": 737 }, { "epoch": 0.3108240909729047, "grad_norm": 0.5066612362861633, "learning_rate": 9.999624565833491e-06, "loss": 0.4925, "step": 738 }, { "epoch": 0.31124526182788154, "grad_norm": 0.6021533608436584, "learning_rate": 9.999593930820182e-06, "loss": 0.5258, "step": 739 }, { "epoch": 0.3116664326828583, "grad_norm": 0.6140767335891724, "learning_rate": 9.999562094500097e-06, "loss": 0.5052, "step": 740 }, { "epoch": 0.31208760353783516, "grad_norm": 0.5514461994171143, "learning_rate": 9.99952905688089e-06, "loss": 0.4996, "step": 741 }, { "epoch": 0.312508774392812, "grad_norm": 0.5531951189041138, "learning_rate": 9.999494817970498e-06, "loss": 0.5294, "step": 742 }, { "epoch": 0.31292994524778883, "grad_norm": 0.5904696583747864, "learning_rate": 9.999459377777148e-06, "loss": 0.4886, "step": 743 }, { "epoch": 0.31335111610276567, "grad_norm": 0.4670141935348511, "learning_rate": 9.999422736309354e-06, "loss": 0.4592, "step": 744 }, { "epoch": 0.3137722869577425, "grad_norm": 0.5748916268348694, "learning_rate": 9.999384893575921e-06, "loss": 0.5187, "step": 745 }, { "epoch": 0.31419345781271935, "grad_norm": 0.569791316986084, "learning_rate": 9.999345849585944e-06, "loss": 0.5273, "step": 746 }, { "epoch": 0.3146146286676962, "grad_norm": 0.497379869222641, "learning_rate": 9.999305604348804e-06, "loss": 0.4888, "step": 747 }, { "epoch": 0.315035799522673, "grad_norm": 0.5711960792541504, "learning_rate": 9.999264157874169e-06, "loss": 0.4837, "step": 748 }, { "epoch": 0.31545697037764986, "grad_norm": 0.5237051248550415, "learning_rate": 9.999221510171998e-06, "loss": 0.5491, "step": 749 }, { "epoch": 0.3158781412326267, "grad_norm": 0.5403212308883667, "learning_rate": 9.99917766125254e-06, "loss": 0.5268, "step": 750 }, { "epoch": 0.31629931208760353, "grad_norm": 0.5178930163383484, "learning_rate": 9.999132611126331e-06, "loss": 0.496, "step": 751 }, { "epoch": 0.3167204829425804, "grad_norm": 0.5580310225486755, "learning_rate": 9.999086359804196e-06, "loss": 0.4733, "step": 752 }, { "epoch": 0.3171416537975572, "grad_norm": 0.4561931788921356, "learning_rate": 9.999038907297245e-06, "loss": 0.4838, "step": 753 }, { "epoch": 0.31756282465253405, "grad_norm": 0.564038872718811, "learning_rate": 9.998990253616884e-06, "loss": 0.4905, "step": 754 }, { "epoch": 0.3179839955075109, "grad_norm": 0.5234927535057068, "learning_rate": 9.998940398774802e-06, "loss": 0.5265, "step": 755 }, { "epoch": 0.3184051663624877, "grad_norm": 0.5318629145622253, "learning_rate": 9.998889342782977e-06, "loss": 0.5296, "step": 756 }, { "epoch": 0.31882633721746456, "grad_norm": 0.5161673426628113, "learning_rate": 9.998837085653679e-06, "loss": 0.5055, "step": 757 }, { "epoch": 0.3192475080724414, "grad_norm": 0.5141353607177734, "learning_rate": 9.998783627399462e-06, "loss": 0.4921, "step": 758 }, { "epoch": 0.31966867892741824, "grad_norm": 0.5502719283103943, "learning_rate": 9.998728968033174e-06, "loss": 0.5029, "step": 759 }, { "epoch": 0.3200898497823951, "grad_norm": 0.6672386527061462, "learning_rate": 9.998673107567944e-06, "loss": 0.5569, "step": 760 }, { "epoch": 0.3205110206373719, "grad_norm": 0.4996210038661957, "learning_rate": 9.9986160460172e-06, "loss": 0.5101, "step": 761 }, { "epoch": 0.32093219149234875, "grad_norm": 0.6489299535751343, "learning_rate": 9.998557783394647e-06, "loss": 0.5266, "step": 762 }, { "epoch": 0.3213533623473256, "grad_norm": 0.5297893285751343, "learning_rate": 9.998498319714289e-06, "loss": 0.5205, "step": 763 }, { "epoch": 0.3217745332023024, "grad_norm": 0.5761034488677979, "learning_rate": 9.998437654990412e-06, "loss": 0.5427, "step": 764 }, { "epoch": 0.3221957040572792, "grad_norm": 0.49307605624198914, "learning_rate": 9.998375789237593e-06, "loss": 0.4611, "step": 765 }, { "epoch": 0.32261687491225605, "grad_norm": 0.5309070944786072, "learning_rate": 9.998312722470698e-06, "loss": 0.4846, "step": 766 }, { "epoch": 0.3230380457672329, "grad_norm": 0.5600467324256897, "learning_rate": 9.998248454704877e-06, "loss": 0.4805, "step": 767 }, { "epoch": 0.3234592166222097, "grad_norm": 0.508985698223114, "learning_rate": 9.998182985955578e-06, "loss": 0.5089, "step": 768 }, { "epoch": 0.32388038747718656, "grad_norm": 0.550682544708252, "learning_rate": 9.998116316238527e-06, "loss": 0.4959, "step": 769 }, { "epoch": 0.3243015583321634, "grad_norm": 0.4994562566280365, "learning_rate": 9.998048445569745e-06, "loss": 0.5115, "step": 770 }, { "epoch": 0.32472272918714024, "grad_norm": 0.521800696849823, "learning_rate": 9.997979373965542e-06, "loss": 0.4929, "step": 771 }, { "epoch": 0.3251439000421171, "grad_norm": 0.5296176075935364, "learning_rate": 9.997909101442512e-06, "loss": 0.4976, "step": 772 }, { "epoch": 0.3255650708970939, "grad_norm": 0.5812305808067322, "learning_rate": 9.997837628017541e-06, "loss": 0.4985, "step": 773 }, { "epoch": 0.32598624175207075, "grad_norm": 0.549400806427002, "learning_rate": 9.997764953707803e-06, "loss": 0.4806, "step": 774 }, { "epoch": 0.3264074126070476, "grad_norm": 0.5452905893325806, "learning_rate": 9.99769107853076e-06, "loss": 0.5001, "step": 775 }, { "epoch": 0.3268285834620244, "grad_norm": 0.5545766949653625, "learning_rate": 9.997616002504163e-06, "loss": 0.5046, "step": 776 }, { "epoch": 0.32724975431700126, "grad_norm": 0.574593186378479, "learning_rate": 9.99753972564605e-06, "loss": 0.4919, "step": 777 }, { "epoch": 0.3276709251719781, "grad_norm": 0.5971167087554932, "learning_rate": 9.997462247974751e-06, "loss": 0.4947, "step": 778 }, { "epoch": 0.32809209602695494, "grad_norm": 0.5062238574028015, "learning_rate": 9.997383569508882e-06, "loss": 0.4931, "step": 779 }, { "epoch": 0.3285132668819318, "grad_norm": 0.6229451298713684, "learning_rate": 9.997303690267345e-06, "loss": 0.5142, "step": 780 }, { "epoch": 0.3289344377369086, "grad_norm": 0.654155969619751, "learning_rate": 9.997222610269339e-06, "loss": 0.5277, "step": 781 }, { "epoch": 0.32935560859188545, "grad_norm": 0.543705940246582, "learning_rate": 9.997140329534342e-06, "loss": 0.4769, "step": 782 }, { "epoch": 0.3297767794468623, "grad_norm": 0.5392808318138123, "learning_rate": 9.997056848082125e-06, "loss": 0.4919, "step": 783 }, { "epoch": 0.3301979503018391, "grad_norm": 0.5126375555992126, "learning_rate": 9.996972165932746e-06, "loss": 0.515, "step": 784 }, { "epoch": 0.33061912115681596, "grad_norm": 0.6471444964408875, "learning_rate": 9.996886283106555e-06, "loss": 0.516, "step": 785 }, { "epoch": 0.3310402920117928, "grad_norm": 0.6420009136199951, "learning_rate": 9.996799199624186e-06, "loss": 0.5151, "step": 786 }, { "epoch": 0.33146146286676964, "grad_norm": 0.582186758518219, "learning_rate": 9.996710915506565e-06, "loss": 0.4923, "step": 787 }, { "epoch": 0.3318826337217465, "grad_norm": 0.5256351828575134, "learning_rate": 9.996621430774905e-06, "loss": 0.4728, "step": 788 }, { "epoch": 0.3323038045767233, "grad_norm": 0.6063500642776489, "learning_rate": 9.996530745450705e-06, "loss": 0.5033, "step": 789 }, { "epoch": 0.33272497543170015, "grad_norm": 0.603878915309906, "learning_rate": 9.996438859555757e-06, "loss": 0.496, "step": 790 }, { "epoch": 0.33314614628667694, "grad_norm": 0.5650388598442078, "learning_rate": 9.996345773112139e-06, "loss": 0.5265, "step": 791 }, { "epoch": 0.3335673171416538, "grad_norm": 0.5612038969993591, "learning_rate": 9.99625148614222e-06, "loss": 0.5128, "step": 792 }, { "epoch": 0.3339884879966306, "grad_norm": 0.527466893196106, "learning_rate": 9.996155998668651e-06, "loss": 0.4781, "step": 793 }, { "epoch": 0.33440965885160745, "grad_norm": 0.6449896097183228, "learning_rate": 9.996059310714379e-06, "loss": 0.4641, "step": 794 }, { "epoch": 0.3348308297065843, "grad_norm": 0.5491938591003418, "learning_rate": 9.995961422302635e-06, "loss": 0.5131, "step": 795 }, { "epoch": 0.3352520005615611, "grad_norm": 0.5340595245361328, "learning_rate": 9.99586233345694e-06, "loss": 0.5162, "step": 796 }, { "epoch": 0.33567317141653796, "grad_norm": 0.5966382026672363, "learning_rate": 9.995762044201104e-06, "loss": 0.5012, "step": 797 }, { "epoch": 0.3360943422715148, "grad_norm": 0.5507976412773132, "learning_rate": 9.995660554559225e-06, "loss": 0.5109, "step": 798 }, { "epoch": 0.33651551312649164, "grad_norm": 0.5535058975219727, "learning_rate": 9.995557864555686e-06, "loss": 0.5195, "step": 799 }, { "epoch": 0.3369366839814685, "grad_norm": 0.6190313100814819, "learning_rate": 9.995453974215164e-06, "loss": 0.5363, "step": 800 }, { "epoch": 0.3373578548364453, "grad_norm": 0.5332507491111755, "learning_rate": 9.99534888356262e-06, "loss": 0.4985, "step": 801 }, { "epoch": 0.33777902569142215, "grad_norm": 0.6167807579040527, "learning_rate": 9.995242592623309e-06, "loss": 0.5451, "step": 802 }, { "epoch": 0.338200196546399, "grad_norm": 0.6138001084327698, "learning_rate": 9.995135101422766e-06, "loss": 0.5644, "step": 803 }, { "epoch": 0.33862136740137583, "grad_norm": 0.5943983793258667, "learning_rate": 9.995026409986823e-06, "loss": 0.5008, "step": 804 }, { "epoch": 0.33904253825635267, "grad_norm": 0.5362452864646912, "learning_rate": 9.994916518341594e-06, "loss": 0.5079, "step": 805 }, { "epoch": 0.3394637091113295, "grad_norm": 0.5298764705657959, "learning_rate": 9.994805426513483e-06, "loss": 0.459, "step": 806 }, { "epoch": 0.33988487996630634, "grad_norm": 0.568414568901062, "learning_rate": 9.994693134529185e-06, "loss": 0.4926, "step": 807 }, { "epoch": 0.3403060508212832, "grad_norm": 0.541574239730835, "learning_rate": 9.994579642415683e-06, "loss": 0.5407, "step": 808 }, { "epoch": 0.34072722167626, "grad_norm": 0.5637990832328796, "learning_rate": 9.994464950200244e-06, "loss": 0.5701, "step": 809 }, { "epoch": 0.34114839253123685, "grad_norm": 0.5483258366584778, "learning_rate": 9.994349057910429e-06, "loss": 0.5371, "step": 810 }, { "epoch": 0.3415695633862137, "grad_norm": 0.6153898239135742, "learning_rate": 9.994231965574083e-06, "loss": 0.5284, "step": 811 }, { "epoch": 0.34199073424119053, "grad_norm": 0.5015638470649719, "learning_rate": 9.994113673219342e-06, "loss": 0.4915, "step": 812 }, { "epoch": 0.34241190509616737, "grad_norm": 0.589610755443573, "learning_rate": 9.99399418087463e-06, "loss": 0.5305, "step": 813 }, { "epoch": 0.3428330759511442, "grad_norm": 0.626335620880127, "learning_rate": 9.993873488568655e-06, "loss": 0.5697, "step": 814 }, { "epoch": 0.34325424680612104, "grad_norm": 0.5336253643035889, "learning_rate": 9.993751596330421e-06, "loss": 0.458, "step": 815 }, { "epoch": 0.3436754176610978, "grad_norm": 0.5426372289657593, "learning_rate": 9.993628504189215e-06, "loss": 0.5079, "step": 816 }, { "epoch": 0.34409658851607466, "grad_norm": 0.5818110108375549, "learning_rate": 9.993504212174612e-06, "loss": 0.4818, "step": 817 }, { "epoch": 0.3445177593710515, "grad_norm": 0.5586771965026855, "learning_rate": 9.99337872031648e-06, "loss": 0.5133, "step": 818 }, { "epoch": 0.34493893022602834, "grad_norm": 0.5641528964042664, "learning_rate": 9.993252028644972e-06, "loss": 0.502, "step": 819 }, { "epoch": 0.3453601010810052, "grad_norm": 0.5722053647041321, "learning_rate": 9.993124137190528e-06, "loss": 0.547, "step": 820 }, { "epoch": 0.345781271935982, "grad_norm": 0.5721474289894104, "learning_rate": 9.992995045983877e-06, "loss": 0.5123, "step": 821 }, { "epoch": 0.34620244279095885, "grad_norm": 0.5321559906005859, "learning_rate": 9.99286475505604e-06, "loss": 0.4922, "step": 822 }, { "epoch": 0.3466236136459357, "grad_norm": 0.5424789190292358, "learning_rate": 9.992733264438323e-06, "loss": 0.5096, "step": 823 }, { "epoch": 0.34704478450091253, "grad_norm": 0.5504637956619263, "learning_rate": 9.992600574162318e-06, "loss": 0.4847, "step": 824 }, { "epoch": 0.34746595535588937, "grad_norm": 0.5167056322097778, "learning_rate": 9.99246668425991e-06, "loss": 0.4802, "step": 825 }, { "epoch": 0.3478871262108662, "grad_norm": 0.5491632223129272, "learning_rate": 9.99233159476327e-06, "loss": 0.5247, "step": 826 }, { "epoch": 0.34830829706584304, "grad_norm": 0.49619030952453613, "learning_rate": 9.992195305704857e-06, "loss": 0.4616, "step": 827 }, { "epoch": 0.3487294679208199, "grad_norm": 0.5032910704612732, "learning_rate": 9.99205781711742e-06, "loss": 0.5144, "step": 828 }, { "epoch": 0.3491506387757967, "grad_norm": 0.6230080127716064, "learning_rate": 9.991919129033994e-06, "loss": 0.4848, "step": 829 }, { "epoch": 0.34957180963077356, "grad_norm": 0.5377132892608643, "learning_rate": 9.9917792414879e-06, "loss": 0.5162, "step": 830 }, { "epoch": 0.3499929804857504, "grad_norm": 0.557381272315979, "learning_rate": 9.991638154512755e-06, "loss": 0.4997, "step": 831 }, { "epoch": 0.35041415134072723, "grad_norm": 0.5852077007293701, "learning_rate": 9.991495868142457e-06, "loss": 0.4796, "step": 832 }, { "epoch": 0.35083532219570407, "grad_norm": 0.5443984866142273, "learning_rate": 9.991352382411197e-06, "loss": 0.521, "step": 833 }, { "epoch": 0.3512564930506809, "grad_norm": 0.5508362054824829, "learning_rate": 9.99120769735345e-06, "loss": 0.5218, "step": 834 }, { "epoch": 0.35167766390565774, "grad_norm": 0.5761767625808716, "learning_rate": 9.99106181300398e-06, "loss": 0.4786, "step": 835 }, { "epoch": 0.3520988347606346, "grad_norm": 0.5098820328712463, "learning_rate": 9.990914729397843e-06, "loss": 0.4677, "step": 836 }, { "epoch": 0.3525200056156114, "grad_norm": 0.47071194648742676, "learning_rate": 9.990766446570377e-06, "loss": 0.4685, "step": 837 }, { "epoch": 0.35294117647058826, "grad_norm": 0.5311869382858276, "learning_rate": 9.990616964557215e-06, "loss": 0.4645, "step": 838 }, { "epoch": 0.3533623473255651, "grad_norm": 0.5771487951278687, "learning_rate": 9.990466283394272e-06, "loss": 0.5527, "step": 839 }, { "epoch": 0.35378351818054193, "grad_norm": 0.5532116293907166, "learning_rate": 9.990314403117755e-06, "loss": 0.5155, "step": 840 }, { "epoch": 0.3542046890355187, "grad_norm": 0.5439020395278931, "learning_rate": 9.990161323764158e-06, "loss": 0.4717, "step": 841 }, { "epoch": 0.35462585989049555, "grad_norm": 0.5644179582595825, "learning_rate": 9.990007045370263e-06, "loss": 0.5079, "step": 842 }, { "epoch": 0.3550470307454724, "grad_norm": 0.5493528246879578, "learning_rate": 9.98985156797314e-06, "loss": 0.4985, "step": 843 }, { "epoch": 0.35546820160044923, "grad_norm": 0.558459997177124, "learning_rate": 9.989694891610147e-06, "loss": 0.4701, "step": 844 }, { "epoch": 0.35588937245542607, "grad_norm": 0.5450106859207153, "learning_rate": 9.989537016318931e-06, "loss": 0.4943, "step": 845 }, { "epoch": 0.3563105433104029, "grad_norm": 0.5248389840126038, "learning_rate": 9.989377942137425e-06, "loss": 0.5084, "step": 846 }, { "epoch": 0.35673171416537974, "grad_norm": 0.5253742933273315, "learning_rate": 9.989217669103855e-06, "loss": 0.51, "step": 847 }, { "epoch": 0.3571528850203566, "grad_norm": 0.5371946096420288, "learning_rate": 9.989056197256726e-06, "loss": 0.5002, "step": 848 }, { "epoch": 0.3575740558753334, "grad_norm": 0.5471078157424927, "learning_rate": 9.98889352663484e-06, "loss": 0.4978, "step": 849 }, { "epoch": 0.35799522673031026, "grad_norm": 0.47979825735092163, "learning_rate": 9.988729657277285e-06, "loss": 0.5061, "step": 850 }, { "epoch": 0.3584163975852871, "grad_norm": 0.5037662982940674, "learning_rate": 9.988564589223433e-06, "loss": 0.4997, "step": 851 }, { "epoch": 0.35883756844026393, "grad_norm": 0.5589022040367126, "learning_rate": 9.988398322512947e-06, "loss": 0.4994, "step": 852 }, { "epoch": 0.35925873929524077, "grad_norm": 0.519187331199646, "learning_rate": 9.98823085718578e-06, "loss": 0.4664, "step": 853 }, { "epoch": 0.3596799101502176, "grad_norm": 0.5366926193237305, "learning_rate": 9.988062193282166e-06, "loss": 0.4822, "step": 854 }, { "epoch": 0.36010108100519445, "grad_norm": 0.46351170539855957, "learning_rate": 9.987892330842637e-06, "loss": 0.4943, "step": 855 }, { "epoch": 0.3605222518601713, "grad_norm": 0.5556720495223999, "learning_rate": 9.987721269908006e-06, "loss": 0.4957, "step": 856 }, { "epoch": 0.3609434227151481, "grad_norm": 0.5593733787536621, "learning_rate": 9.987549010519375e-06, "loss": 0.5251, "step": 857 }, { "epoch": 0.36136459357012496, "grad_norm": 0.48286935687065125, "learning_rate": 9.987375552718133e-06, "loss": 0.5339, "step": 858 }, { "epoch": 0.3617857644251018, "grad_norm": 0.518423855304718, "learning_rate": 9.987200896545962e-06, "loss": 0.4952, "step": 859 }, { "epoch": 0.36220693528007863, "grad_norm": 0.534591555595398, "learning_rate": 9.987025042044826e-06, "loss": 0.51, "step": 860 }, { "epoch": 0.3626281061350555, "grad_norm": 0.5550995469093323, "learning_rate": 9.98684798925698e-06, "loss": 0.5188, "step": 861 }, { "epoch": 0.3630492769900323, "grad_norm": 0.5077584385871887, "learning_rate": 9.986669738224967e-06, "loss": 0.5189, "step": 862 }, { "epoch": 0.36347044784500915, "grad_norm": 0.550168514251709, "learning_rate": 9.986490288991617e-06, "loss": 0.5124, "step": 863 }, { "epoch": 0.363891618699986, "grad_norm": 0.6039422750473022, "learning_rate": 9.98630964160005e-06, "loss": 0.5592, "step": 864 }, { "epoch": 0.3643127895549628, "grad_norm": 0.6406044363975525, "learning_rate": 9.986127796093668e-06, "loss": 0.5369, "step": 865 }, { "epoch": 0.3647339604099396, "grad_norm": 0.5474851131439209, "learning_rate": 9.985944752516168e-06, "loss": 0.5021, "step": 866 }, { "epoch": 0.36515513126491644, "grad_norm": 0.5576897859573364, "learning_rate": 9.985760510911531e-06, "loss": 0.4878, "step": 867 }, { "epoch": 0.3655763021198933, "grad_norm": 0.4689224362373352, "learning_rate": 9.985575071324028e-06, "loss": 0.4578, "step": 868 }, { "epoch": 0.3659974729748701, "grad_norm": 0.6127997636795044, "learning_rate": 9.985388433798215e-06, "loss": 0.5131, "step": 869 }, { "epoch": 0.36641864382984696, "grad_norm": 0.5331124663352966, "learning_rate": 9.985200598378938e-06, "loss": 0.5191, "step": 870 }, { "epoch": 0.3668398146848238, "grad_norm": 0.566594660282135, "learning_rate": 9.985011565111331e-06, "loss": 0.5154, "step": 871 }, { "epoch": 0.36726098553980063, "grad_norm": 0.5333290696144104, "learning_rate": 9.984821334040814e-06, "loss": 0.5366, "step": 872 }, { "epoch": 0.36768215639477747, "grad_norm": 0.5496194958686829, "learning_rate": 9.984629905213096e-06, "loss": 0.469, "step": 873 }, { "epoch": 0.3681033272497543, "grad_norm": 0.5434479713439941, "learning_rate": 9.984437278674173e-06, "loss": 0.519, "step": 874 }, { "epoch": 0.36852449810473115, "grad_norm": 0.5367580652236938, "learning_rate": 9.984243454470333e-06, "loss": 0.4975, "step": 875 }, { "epoch": 0.368945668959708, "grad_norm": 0.534529447555542, "learning_rate": 9.984048432648142e-06, "loss": 0.5014, "step": 876 }, { "epoch": 0.3693668398146848, "grad_norm": 0.5293595790863037, "learning_rate": 9.983852213254466e-06, "loss": 0.497, "step": 877 }, { "epoch": 0.36978801066966166, "grad_norm": 0.5725763440132141, "learning_rate": 9.98365479633645e-06, "loss": 0.4861, "step": 878 }, { "epoch": 0.3702091815246385, "grad_norm": 0.5591570734977722, "learning_rate": 9.98345618194153e-06, "loss": 0.5097, "step": 879 }, { "epoch": 0.37063035237961534, "grad_norm": 0.5118606686592102, "learning_rate": 9.983256370117429e-06, "loss": 0.4614, "step": 880 }, { "epoch": 0.3710515232345922, "grad_norm": 0.5541980266571045, "learning_rate": 9.983055360912158e-06, "loss": 0.5089, "step": 881 }, { "epoch": 0.371472694089569, "grad_norm": 0.5364789366722107, "learning_rate": 9.982853154374015e-06, "loss": 0.4969, "step": 882 }, { "epoch": 0.37189386494454585, "grad_norm": 0.5508146286010742, "learning_rate": 9.982649750551589e-06, "loss": 0.5347, "step": 883 }, { "epoch": 0.3723150357995227, "grad_norm": 0.5321168303489685, "learning_rate": 9.98244514949375e-06, "loss": 0.5465, "step": 884 }, { "epoch": 0.3727362066544995, "grad_norm": 0.5358721613883972, "learning_rate": 9.982239351249663e-06, "loss": 0.5188, "step": 885 }, { "epoch": 0.37315737750947636, "grad_norm": 0.5896248817443848, "learning_rate": 9.982032355868776e-06, "loss": 0.5214, "step": 886 }, { "epoch": 0.3735785483644532, "grad_norm": 0.5385740399360657, "learning_rate": 9.981824163400827e-06, "loss": 0.4929, "step": 887 }, { "epoch": 0.37399971921943004, "grad_norm": 0.4953187108039856, "learning_rate": 9.98161477389584e-06, "loss": 0.4704, "step": 888 }, { "epoch": 0.3744208900744069, "grad_norm": 0.5206578373908997, "learning_rate": 9.981404187404127e-06, "loss": 0.5133, "step": 889 }, { "epoch": 0.3748420609293837, "grad_norm": 0.4873134195804596, "learning_rate": 9.981192403976288e-06, "loss": 0.4633, "step": 890 }, { "epoch": 0.3752632317843605, "grad_norm": 0.5719467401504517, "learning_rate": 9.980979423663211e-06, "loss": 0.526, "step": 891 }, { "epoch": 0.37568440263933733, "grad_norm": 0.5105690956115723, "learning_rate": 9.98076524651607e-06, "loss": 0.4824, "step": 892 }, { "epoch": 0.37610557349431417, "grad_norm": 0.4913564920425415, "learning_rate": 9.98054987258633e-06, "loss": 0.5224, "step": 893 }, { "epoch": 0.376526744349291, "grad_norm": 0.5625660419464111, "learning_rate": 9.980333301925738e-06, "loss": 0.5193, "step": 894 }, { "epoch": 0.37694791520426785, "grad_norm": 0.5290790796279907, "learning_rate": 9.980115534586334e-06, "loss": 0.5198, "step": 895 }, { "epoch": 0.3773690860592447, "grad_norm": 0.5487920641899109, "learning_rate": 9.979896570620441e-06, "loss": 0.4935, "step": 896 }, { "epoch": 0.3777902569142215, "grad_norm": 0.6343806982040405, "learning_rate": 9.979676410080676e-06, "loss": 0.548, "step": 897 }, { "epoch": 0.37821142776919836, "grad_norm": 0.5102066993713379, "learning_rate": 9.979455053019938e-06, "loss": 0.5028, "step": 898 }, { "epoch": 0.3786325986241752, "grad_norm": 0.5345166921615601, "learning_rate": 9.97923249949141e-06, "loss": 0.487, "step": 899 }, { "epoch": 0.37905376947915204, "grad_norm": 0.6371459364891052, "learning_rate": 9.979008749548575e-06, "loss": 0.5096, "step": 900 }, { "epoch": 0.3794749403341289, "grad_norm": 0.5323603749275208, "learning_rate": 9.978783803245191e-06, "loss": 0.4769, "step": 901 }, { "epoch": 0.3798961111891057, "grad_norm": 0.5666497349739075, "learning_rate": 9.97855766063531e-06, "loss": 0.5034, "step": 902 }, { "epoch": 0.38031728204408255, "grad_norm": 0.5599475502967834, "learning_rate": 9.978330321773269e-06, "loss": 0.4933, "step": 903 }, { "epoch": 0.3807384528990594, "grad_norm": 0.5716677308082581, "learning_rate": 9.978101786713693e-06, "loss": 0.5088, "step": 904 }, { "epoch": 0.3811596237540362, "grad_norm": 0.57113116979599, "learning_rate": 9.977872055511493e-06, "loss": 0.5283, "step": 905 }, { "epoch": 0.38158079460901306, "grad_norm": 0.5398300886154175, "learning_rate": 9.977641128221872e-06, "loss": 0.531, "step": 906 }, { "epoch": 0.3820019654639899, "grad_norm": 0.6258471608161926, "learning_rate": 9.977409004900316e-06, "loss": 0.5527, "step": 907 }, { "epoch": 0.38242313631896674, "grad_norm": 0.646012544631958, "learning_rate": 9.977175685602601e-06, "loss": 0.5184, "step": 908 }, { "epoch": 0.3828443071739436, "grad_norm": 0.4632631540298462, "learning_rate": 9.976941170384789e-06, "loss": 0.4732, "step": 909 }, { "epoch": 0.3832654780289204, "grad_norm": 0.598080575466156, "learning_rate": 9.976705459303227e-06, "loss": 0.5157, "step": 910 }, { "epoch": 0.38368664888389725, "grad_norm": 0.5926657319068909, "learning_rate": 9.976468552414554e-06, "loss": 0.4834, "step": 911 }, { "epoch": 0.3841078197388741, "grad_norm": 0.49719956517219543, "learning_rate": 9.976230449775694e-06, "loss": 0.4895, "step": 912 }, { "epoch": 0.38452899059385093, "grad_norm": 0.5991385579109192, "learning_rate": 9.97599115144386e-06, "loss": 0.5108, "step": 913 }, { "epoch": 0.38495016144882777, "grad_norm": 0.5822198390960693, "learning_rate": 9.975750657476547e-06, "loss": 0.4676, "step": 914 }, { "epoch": 0.3853713323038046, "grad_norm": 0.5383700728416443, "learning_rate": 9.975508967931544e-06, "loss": 0.5488, "step": 915 }, { "epoch": 0.3857925031587814, "grad_norm": 0.5995841026306152, "learning_rate": 9.975266082866923e-06, "loss": 0.5031, "step": 916 }, { "epoch": 0.3862136740137582, "grad_norm": 0.5812575221061707, "learning_rate": 9.975022002341045e-06, "loss": 0.4968, "step": 917 }, { "epoch": 0.38663484486873506, "grad_norm": 0.5231122970581055, "learning_rate": 9.974776726412561e-06, "loss": 0.5375, "step": 918 }, { "epoch": 0.3870560157237119, "grad_norm": 0.6084420084953308, "learning_rate": 9.974530255140401e-06, "loss": 0.5175, "step": 919 }, { "epoch": 0.38747718657868874, "grad_norm": 0.48167508840560913, "learning_rate": 9.97428258858379e-06, "loss": 0.4853, "step": 920 }, { "epoch": 0.3878983574336656, "grad_norm": 0.5865576863288879, "learning_rate": 9.974033726802236e-06, "loss": 0.5011, "step": 921 }, { "epoch": 0.3883195282886424, "grad_norm": 0.5213209986686707, "learning_rate": 9.97378366985554e-06, "loss": 0.4818, "step": 922 }, { "epoch": 0.38874069914361925, "grad_norm": 0.48426440358161926, "learning_rate": 9.97353241780378e-06, "loss": 0.5141, "step": 923 }, { "epoch": 0.3891618699985961, "grad_norm": 0.5556553602218628, "learning_rate": 9.97327997070733e-06, "loss": 0.4984, "step": 924 }, { "epoch": 0.3895830408535729, "grad_norm": 0.542306661605835, "learning_rate": 9.973026328626849e-06, "loss": 0.5431, "step": 925 }, { "epoch": 0.39000421170854976, "grad_norm": 0.5212919116020203, "learning_rate": 9.97277149162328e-06, "loss": 0.537, "step": 926 }, { "epoch": 0.3904253825635266, "grad_norm": 0.493380606174469, "learning_rate": 9.972515459757858e-06, "loss": 0.4767, "step": 927 }, { "epoch": 0.39084655341850344, "grad_norm": 0.5515159964561462, "learning_rate": 9.972258233092101e-06, "loss": 0.5278, "step": 928 }, { "epoch": 0.3912677242734803, "grad_norm": 0.514544665813446, "learning_rate": 9.971999811687815e-06, "loss": 0.508, "step": 929 }, { "epoch": 0.3916888951284571, "grad_norm": 0.5323236584663391, "learning_rate": 9.971740195607096e-06, "loss": 0.5317, "step": 930 }, { "epoch": 0.39211006598343395, "grad_norm": 0.6211426258087158, "learning_rate": 9.971479384912324e-06, "loss": 0.554, "step": 931 }, { "epoch": 0.3925312368384108, "grad_norm": 0.5563642978668213, "learning_rate": 9.971217379666166e-06, "loss": 0.4932, "step": 932 }, { "epoch": 0.39295240769338763, "grad_norm": 0.5367445945739746, "learning_rate": 9.970954179931576e-06, "loss": 0.4919, "step": 933 }, { "epoch": 0.39337357854836447, "grad_norm": 0.5234500169754028, "learning_rate": 9.970689785771798e-06, "loss": 0.5071, "step": 934 }, { "epoch": 0.3937947494033413, "grad_norm": 0.5334751605987549, "learning_rate": 9.97042419725036e-06, "loss": 0.5105, "step": 935 }, { "epoch": 0.39421592025831814, "grad_norm": 0.5104185342788696, "learning_rate": 9.970157414431079e-06, "loss": 0.4941, "step": 936 }, { "epoch": 0.394637091113295, "grad_norm": 0.49806591868400574, "learning_rate": 9.969889437378055e-06, "loss": 0.4936, "step": 937 }, { "epoch": 0.3950582619682718, "grad_norm": 0.5477558970451355, "learning_rate": 9.96962026615568e-06, "loss": 0.5013, "step": 938 }, { "epoch": 0.39547943282324866, "grad_norm": 0.5231910347938538, "learning_rate": 9.969349900828628e-06, "loss": 0.489, "step": 939 }, { "epoch": 0.3959006036782255, "grad_norm": 0.48761364817619324, "learning_rate": 9.969078341461867e-06, "loss": 0.5, "step": 940 }, { "epoch": 0.3963217745332023, "grad_norm": 0.48567283153533936, "learning_rate": 9.968805588120644e-06, "loss": 0.4794, "step": 941 }, { "epoch": 0.3967429453881791, "grad_norm": 0.5358812212944031, "learning_rate": 9.968531640870497e-06, "loss": 0.5088, "step": 942 }, { "epoch": 0.39716411624315595, "grad_norm": 0.5252009630203247, "learning_rate": 9.968256499777252e-06, "loss": 0.5062, "step": 943 }, { "epoch": 0.3975852870981328, "grad_norm": 0.49185776710510254, "learning_rate": 9.967980164907019e-06, "loss": 0.4865, "step": 944 }, { "epoch": 0.3980064579531096, "grad_norm": 0.5530582666397095, "learning_rate": 9.967702636326195e-06, "loss": 0.4808, "step": 945 }, { "epoch": 0.39842762880808646, "grad_norm": 0.51576828956604, "learning_rate": 9.967423914101467e-06, "loss": 0.5338, "step": 946 }, { "epoch": 0.3988487996630633, "grad_norm": 0.47284460067749023, "learning_rate": 9.967143998299803e-06, "loss": 0.4803, "step": 947 }, { "epoch": 0.39926997051804014, "grad_norm": 0.5379521250724792, "learning_rate": 9.966862888988468e-06, "loss": 0.51, "step": 948 }, { "epoch": 0.399691141373017, "grad_norm": 0.5910153985023499, "learning_rate": 9.966580586235e-06, "loss": 0.5074, "step": 949 }, { "epoch": 0.4001123122279938, "grad_norm": 0.5052345991134644, "learning_rate": 9.966297090107236e-06, "loss": 0.4692, "step": 950 }, { "epoch": 0.40053348308297065, "grad_norm": 0.5540590882301331, "learning_rate": 9.966012400673291e-06, "loss": 0.4849, "step": 951 }, { "epoch": 0.4009546539379475, "grad_norm": 0.540916919708252, "learning_rate": 9.96572651800157e-06, "loss": 0.4663, "step": 952 }, { "epoch": 0.40137582479292433, "grad_norm": 0.528943657875061, "learning_rate": 9.965439442160771e-06, "loss": 0.5412, "step": 953 }, { "epoch": 0.40179699564790117, "grad_norm": 0.5570342540740967, "learning_rate": 9.965151173219867e-06, "loss": 0.4992, "step": 954 }, { "epoch": 0.402218166502878, "grad_norm": 0.5770092606544495, "learning_rate": 9.964861711248126e-06, "loss": 0.4954, "step": 955 }, { "epoch": 0.40263933735785484, "grad_norm": 0.5089607834815979, "learning_rate": 9.964571056315099e-06, "loss": 0.5142, "step": 956 }, { "epoch": 0.4030605082128317, "grad_norm": 0.5170055627822876, "learning_rate": 9.964279208490627e-06, "loss": 0.5034, "step": 957 }, { "epoch": 0.4034816790678085, "grad_norm": 0.5325459837913513, "learning_rate": 9.963986167844833e-06, "loss": 0.4758, "step": 958 }, { "epoch": 0.40390284992278536, "grad_norm": 0.5389122366905212, "learning_rate": 9.963691934448131e-06, "loss": 0.4994, "step": 959 }, { "epoch": 0.4043240207777622, "grad_norm": 0.5330473184585571, "learning_rate": 9.963396508371219e-06, "loss": 0.4764, "step": 960 }, { "epoch": 0.40474519163273903, "grad_norm": 0.5488138794898987, "learning_rate": 9.96309988968508e-06, "loss": 0.5015, "step": 961 }, { "epoch": 0.40516636248771587, "grad_norm": 0.5483156442642212, "learning_rate": 9.96280207846099e-06, "loss": 0.514, "step": 962 }, { "epoch": 0.4055875333426927, "grad_norm": 0.5995326638221741, "learning_rate": 9.962503074770502e-06, "loss": 0.5132, "step": 963 }, { "epoch": 0.40600870419766955, "grad_norm": 0.5480200052261353, "learning_rate": 9.962202878685467e-06, "loss": 0.5073, "step": 964 }, { "epoch": 0.4064298750526464, "grad_norm": 0.4754018783569336, "learning_rate": 9.961901490278013e-06, "loss": 0.4846, "step": 965 }, { "epoch": 0.40685104590762317, "grad_norm": 0.5804879665374756, "learning_rate": 9.961598909620556e-06, "loss": 0.4944, "step": 966 }, { "epoch": 0.4072722167626, "grad_norm": 0.5706849694252014, "learning_rate": 9.961295136785804e-06, "loss": 0.5472, "step": 967 }, { "epoch": 0.40769338761757684, "grad_norm": 0.5082347393035889, "learning_rate": 9.960990171846745e-06, "loss": 0.4604, "step": 968 }, { "epoch": 0.4081145584725537, "grad_norm": 0.5157772898674011, "learning_rate": 9.960684014876659e-06, "loss": 0.4916, "step": 969 }, { "epoch": 0.4085357293275305, "grad_norm": 0.5824040174484253, "learning_rate": 9.960376665949107e-06, "loss": 0.4885, "step": 970 }, { "epoch": 0.40895690018250735, "grad_norm": 0.48994070291519165, "learning_rate": 9.960068125137943e-06, "loss": 0.4804, "step": 971 }, { "epoch": 0.4093780710374842, "grad_norm": 0.5818823575973511, "learning_rate": 9.959758392517298e-06, "loss": 0.4959, "step": 972 }, { "epoch": 0.40979924189246103, "grad_norm": 0.5012041926383972, "learning_rate": 9.959447468161598e-06, "loss": 0.5179, "step": 973 }, { "epoch": 0.41022041274743787, "grad_norm": 0.5118056535720825, "learning_rate": 9.959135352145552e-06, "loss": 0.497, "step": 974 }, { "epoch": 0.4106415836024147, "grad_norm": 0.4999248683452606, "learning_rate": 9.958822044544157e-06, "loss": 0.4729, "step": 975 }, { "epoch": 0.41106275445739154, "grad_norm": 0.5352482199668884, "learning_rate": 9.958507545432693e-06, "loss": 0.5189, "step": 976 }, { "epoch": 0.4114839253123684, "grad_norm": 0.5408862233161926, "learning_rate": 9.958191854886726e-06, "loss": 0.5311, "step": 977 }, { "epoch": 0.4119050961673452, "grad_norm": 0.5066216588020325, "learning_rate": 9.957874972982114e-06, "loss": 0.5314, "step": 978 }, { "epoch": 0.41232626702232206, "grad_norm": 0.5014339685440063, "learning_rate": 9.957556899794994e-06, "loss": 0.5217, "step": 979 }, { "epoch": 0.4127474378772989, "grad_norm": 0.5537757277488708, "learning_rate": 9.957237635401797e-06, "loss": 0.5155, "step": 980 }, { "epoch": 0.41316860873227573, "grad_norm": 0.5059285163879395, "learning_rate": 9.956917179879235e-06, "loss": 0.4753, "step": 981 }, { "epoch": 0.41358977958725257, "grad_norm": 0.5351167321205139, "learning_rate": 9.956595533304305e-06, "loss": 0.5237, "step": 982 }, { "epoch": 0.4140109504422294, "grad_norm": 0.5098128318786621, "learning_rate": 9.956272695754294e-06, "loss": 0.5101, "step": 983 }, { "epoch": 0.41443212129720625, "grad_norm": 0.5226907730102539, "learning_rate": 9.955948667306776e-06, "loss": 0.5051, "step": 984 }, { "epoch": 0.4148532921521831, "grad_norm": 0.52610844373703, "learning_rate": 9.955623448039605e-06, "loss": 0.5224, "step": 985 }, { "epoch": 0.4152744630071599, "grad_norm": 0.5357764363288879, "learning_rate": 9.955297038030927e-06, "loss": 0.4897, "step": 986 }, { "epoch": 0.41569563386213676, "grad_norm": 0.4529535174369812, "learning_rate": 9.954969437359173e-06, "loss": 0.4958, "step": 987 }, { "epoch": 0.4161168047171136, "grad_norm": 0.5890129804611206, "learning_rate": 9.954640646103055e-06, "loss": 0.4992, "step": 988 }, { "epoch": 0.41653797557209044, "grad_norm": 0.5567834973335266, "learning_rate": 9.95431066434158e-06, "loss": 0.5087, "step": 989 }, { "epoch": 0.4169591464270673, "grad_norm": 0.5088604688644409, "learning_rate": 9.953979492154034e-06, "loss": 0.4875, "step": 990 }, { "epoch": 0.41738031728204406, "grad_norm": 0.5085922479629517, "learning_rate": 9.953647129619993e-06, "loss": 0.4757, "step": 991 }, { "epoch": 0.4178014881370209, "grad_norm": 0.5335556864738464, "learning_rate": 9.953313576819314e-06, "loss": 0.4916, "step": 992 }, { "epoch": 0.41822265899199773, "grad_norm": 0.49650493264198303, "learning_rate": 9.952978833832148e-06, "loss": 0.5124, "step": 993 }, { "epoch": 0.41864382984697457, "grad_norm": 0.46385326981544495, "learning_rate": 9.952642900738922e-06, "loss": 0.4671, "step": 994 }, { "epoch": 0.4190650007019514, "grad_norm": 0.4828450083732605, "learning_rate": 9.952305777620359e-06, "loss": 0.4726, "step": 995 }, { "epoch": 0.41948617155692824, "grad_norm": 0.5506529211997986, "learning_rate": 9.951967464557461e-06, "loss": 0.524, "step": 996 }, { "epoch": 0.4199073424119051, "grad_norm": 0.5327737331390381, "learning_rate": 9.951627961631518e-06, "loss": 0.5109, "step": 997 }, { "epoch": 0.4203285132668819, "grad_norm": 0.5055199861526489, "learning_rate": 9.951287268924108e-06, "loss": 0.4742, "step": 998 }, { "epoch": 0.42074968412185876, "grad_norm": 0.5192716121673584, "learning_rate": 9.95094538651709e-06, "loss": 0.4982, "step": 999 }, { "epoch": 0.4211708549768356, "grad_norm": 0.5064480304718018, "learning_rate": 9.950602314492615e-06, "loss": 0.4942, "step": 1000 }, { "epoch": 0.42159202583181243, "grad_norm": 0.504500150680542, "learning_rate": 9.950258052933112e-06, "loss": 0.5086, "step": 1001 }, { "epoch": 0.42201319668678927, "grad_norm": 0.5259708166122437, "learning_rate": 9.949912601921306e-06, "loss": 0.5017, "step": 1002 }, { "epoch": 0.4224343675417661, "grad_norm": 0.5199363827705383, "learning_rate": 9.9495659615402e-06, "loss": 0.5, "step": 1003 }, { "epoch": 0.42285553839674295, "grad_norm": 0.5089852809906006, "learning_rate": 9.949218131873083e-06, "loss": 0.4654, "step": 1004 }, { "epoch": 0.4232767092517198, "grad_norm": 0.5362235307693481, "learning_rate": 9.948869113003534e-06, "loss": 0.4869, "step": 1005 }, { "epoch": 0.4236978801066966, "grad_norm": 0.5509651899337769, "learning_rate": 9.948518905015416e-06, "loss": 0.5199, "step": 1006 }, { "epoch": 0.42411905096167346, "grad_norm": 0.5197945833206177, "learning_rate": 9.948167507992876e-06, "loss": 0.4878, "step": 1007 }, { "epoch": 0.4245402218166503, "grad_norm": 0.48915669322013855, "learning_rate": 9.947814922020349e-06, "loss": 0.4955, "step": 1008 }, { "epoch": 0.42496139267162714, "grad_norm": 0.5024054646492004, "learning_rate": 9.947461147182552e-06, "loss": 0.4933, "step": 1009 }, { "epoch": 0.425382563526604, "grad_norm": 0.5417972803115845, "learning_rate": 9.947106183564493e-06, "loss": 0.527, "step": 1010 }, { "epoch": 0.4258037343815808, "grad_norm": 0.5344251394271851, "learning_rate": 9.946750031251464e-06, "loss": 0.5138, "step": 1011 }, { "epoch": 0.42622490523655765, "grad_norm": 0.5275354385375977, "learning_rate": 9.946392690329038e-06, "loss": 0.4793, "step": 1012 }, { "epoch": 0.4266460760915345, "grad_norm": 0.5092624425888062, "learning_rate": 9.94603416088308e-06, "loss": 0.4832, "step": 1013 }, { "epoch": 0.4270672469465113, "grad_norm": 0.5218087434768677, "learning_rate": 9.945674442999738e-06, "loss": 0.5198, "step": 1014 }, { "epoch": 0.42748841780148816, "grad_norm": 0.4988138973712921, "learning_rate": 9.945313536765441e-06, "loss": 0.492, "step": 1015 }, { "epoch": 0.42790958865646495, "grad_norm": 0.48634809255599976, "learning_rate": 9.944951442266915e-06, "loss": 0.4885, "step": 1016 }, { "epoch": 0.4283307595114418, "grad_norm": 0.5142061114311218, "learning_rate": 9.944588159591158e-06, "loss": 0.4531, "step": 1017 }, { "epoch": 0.4287519303664186, "grad_norm": 0.5215470790863037, "learning_rate": 9.944223688825463e-06, "loss": 0.4978, "step": 1018 }, { "epoch": 0.42917310122139546, "grad_norm": 0.5146050453186035, "learning_rate": 9.943858030057404e-06, "loss": 0.4971, "step": 1019 }, { "epoch": 0.4295942720763723, "grad_norm": 0.5418946146965027, "learning_rate": 9.943491183374843e-06, "loss": 0.5012, "step": 1020 }, { "epoch": 0.43001544293134913, "grad_norm": 0.48179569840431213, "learning_rate": 9.943123148865926e-06, "loss": 0.4654, "step": 1021 }, { "epoch": 0.430436613786326, "grad_norm": 0.5002156496047974, "learning_rate": 9.942753926619084e-06, "loss": 0.5081, "step": 1022 }, { "epoch": 0.4308577846413028, "grad_norm": 0.5153267979621887, "learning_rate": 9.942383516723036e-06, "loss": 0.5019, "step": 1023 }, { "epoch": 0.43127895549627965, "grad_norm": 0.5779337882995605, "learning_rate": 9.942011919266781e-06, "loss": 0.5018, "step": 1024 }, { "epoch": 0.4317001263512565, "grad_norm": 0.5855889320373535, "learning_rate": 9.941639134339608e-06, "loss": 0.5504, "step": 1025 }, { "epoch": 0.4321212972062333, "grad_norm": 0.5405358076095581, "learning_rate": 9.941265162031093e-06, "loss": 0.5326, "step": 1026 }, { "epoch": 0.43254246806121016, "grad_norm": 0.5407902002334595, "learning_rate": 9.940890002431089e-06, "loss": 0.466, "step": 1027 }, { "epoch": 0.432963638916187, "grad_norm": 0.5325146317481995, "learning_rate": 9.940513655629744e-06, "loss": 0.5058, "step": 1028 }, { "epoch": 0.43338480977116384, "grad_norm": 0.535900354385376, "learning_rate": 9.940136121717488e-06, "loss": 0.4903, "step": 1029 }, { "epoch": 0.4338059806261407, "grad_norm": 0.488869845867157, "learning_rate": 9.93975740078503e-06, "loss": 0.4999, "step": 1030 }, { "epoch": 0.4342271514811175, "grad_norm": 0.5338951349258423, "learning_rate": 9.939377492923374e-06, "loss": 0.5039, "step": 1031 }, { "epoch": 0.43464832233609435, "grad_norm": 0.5249205827713013, "learning_rate": 9.938996398223802e-06, "loss": 0.4752, "step": 1032 }, { "epoch": 0.4350694931910712, "grad_norm": 0.49976250529289246, "learning_rate": 9.938614116777885e-06, "loss": 0.5058, "step": 1033 }, { "epoch": 0.435490664046048, "grad_norm": 0.5402967929840088, "learning_rate": 9.938230648677477e-06, "loss": 0.4863, "step": 1034 }, { "epoch": 0.43591183490102486, "grad_norm": 0.5705558657646179, "learning_rate": 9.937845994014719e-06, "loss": 0.4974, "step": 1035 }, { "epoch": 0.4363330057560017, "grad_norm": 0.49645936489105225, "learning_rate": 9.937460152882035e-06, "loss": 0.4763, "step": 1036 }, { "epoch": 0.43675417661097854, "grad_norm": 0.5033256411552429, "learning_rate": 9.937073125372136e-06, "loss": 0.5141, "step": 1037 }, { "epoch": 0.4371753474659554, "grad_norm": 0.6053181290626526, "learning_rate": 9.936684911578019e-06, "loss": 0.5273, "step": 1038 }, { "epoch": 0.4375965183209322, "grad_norm": 0.5322350263595581, "learning_rate": 9.93629551159296e-06, "loss": 0.5272, "step": 1039 }, { "epoch": 0.43801768917590905, "grad_norm": 0.4916490614414215, "learning_rate": 9.93590492551053e-06, "loss": 0.521, "step": 1040 }, { "epoch": 0.43843886003088584, "grad_norm": 0.5022852420806885, "learning_rate": 9.935513153424574e-06, "loss": 0.5022, "step": 1041 }, { "epoch": 0.4388600308858627, "grad_norm": 0.4731747508049011, "learning_rate": 9.93512019542923e-06, "loss": 0.494, "step": 1042 }, { "epoch": 0.4392812017408395, "grad_norm": 0.4519294798374176, "learning_rate": 9.934726051618919e-06, "loss": 0.4808, "step": 1043 }, { "epoch": 0.43970237259581635, "grad_norm": 0.4474514424800873, "learning_rate": 9.934330722088343e-06, "loss": 0.5053, "step": 1044 }, { "epoch": 0.4401235434507932, "grad_norm": 0.5138682723045349, "learning_rate": 9.933934206932497e-06, "loss": 0.5265, "step": 1045 }, { "epoch": 0.44054471430577, "grad_norm": 0.6026700735092163, "learning_rate": 9.933536506246653e-06, "loss": 0.5153, "step": 1046 }, { "epoch": 0.44096588516074686, "grad_norm": 0.4908948838710785, "learning_rate": 9.93313762012637e-06, "loss": 0.4955, "step": 1047 }, { "epoch": 0.4413870560157237, "grad_norm": 0.5137760639190674, "learning_rate": 9.932737548667494e-06, "loss": 0.5067, "step": 1048 }, { "epoch": 0.44180822687070054, "grad_norm": 0.5057886838912964, "learning_rate": 9.932336291966155e-06, "loss": 0.4909, "step": 1049 }, { "epoch": 0.4422293977256774, "grad_norm": 0.5121598839759827, "learning_rate": 9.931933850118767e-06, "loss": 0.5169, "step": 1050 }, { "epoch": 0.4426505685806542, "grad_norm": 0.4953257143497467, "learning_rate": 9.931530223222028e-06, "loss": 0.4704, "step": 1051 }, { "epoch": 0.44307173943563105, "grad_norm": 0.5212249755859375, "learning_rate": 9.931125411372922e-06, "loss": 0.5115, "step": 1052 }, { "epoch": 0.4434929102906079, "grad_norm": 0.4437900185585022, "learning_rate": 9.93071941466872e-06, "loss": 0.5013, "step": 1053 }, { "epoch": 0.4439140811455847, "grad_norm": 0.4716014266014099, "learning_rate": 9.930312233206971e-06, "loss": 0.5032, "step": 1054 }, { "epoch": 0.44433525200056156, "grad_norm": 0.4387332797050476, "learning_rate": 9.929903867085518e-06, "loss": 0.5328, "step": 1055 }, { "epoch": 0.4447564228555384, "grad_norm": 0.5548903942108154, "learning_rate": 9.92949431640248e-06, "loss": 0.5136, "step": 1056 }, { "epoch": 0.44517759371051524, "grad_norm": 0.4932219684123993, "learning_rate": 9.929083581256265e-06, "loss": 0.5066, "step": 1057 }, { "epoch": 0.4455987645654921, "grad_norm": 0.5067867636680603, "learning_rate": 9.928671661745563e-06, "loss": 0.5104, "step": 1058 }, { "epoch": 0.4460199354204689, "grad_norm": 0.6110357046127319, "learning_rate": 9.928258557969355e-06, "loss": 0.5244, "step": 1059 }, { "epoch": 0.44644110627544575, "grad_norm": 0.5166447758674622, "learning_rate": 9.927844270026899e-06, "loss": 0.4978, "step": 1060 }, { "epoch": 0.4468622771304226, "grad_norm": 0.47308728098869324, "learning_rate": 9.927428798017738e-06, "loss": 0.4976, "step": 1061 }, { "epoch": 0.44728344798539943, "grad_norm": 0.5376313328742981, "learning_rate": 9.927012142041707e-06, "loss": 0.5208, "step": 1062 }, { "epoch": 0.44770461884037627, "grad_norm": 0.5208466649055481, "learning_rate": 9.926594302198917e-06, "loss": 0.4793, "step": 1063 }, { "epoch": 0.4481257896953531, "grad_norm": 0.4863203167915344, "learning_rate": 9.92617527858977e-06, "loss": 0.4896, "step": 1064 }, { "epoch": 0.44854696055032994, "grad_norm": 0.4973244369029999, "learning_rate": 9.925755071314944e-06, "loss": 0.493, "step": 1065 }, { "epoch": 0.4489681314053067, "grad_norm": 0.5680011510848999, "learning_rate": 9.925333680475412e-06, "loss": 0.4594, "step": 1066 }, { "epoch": 0.44938930226028356, "grad_norm": 0.49685531854629517, "learning_rate": 9.924911106172426e-06, "loss": 0.5039, "step": 1067 }, { "epoch": 0.4498104731152604, "grad_norm": 0.4581175446510315, "learning_rate": 9.924487348507519e-06, "loss": 0.4825, "step": 1068 }, { "epoch": 0.45023164397023724, "grad_norm": 0.5729601979255676, "learning_rate": 9.924062407582513e-06, "loss": 0.5479, "step": 1069 }, { "epoch": 0.4506528148252141, "grad_norm": 0.46767398715019226, "learning_rate": 9.923636283499513e-06, "loss": 0.495, "step": 1070 }, { "epoch": 0.4510739856801909, "grad_norm": 0.5137498378753662, "learning_rate": 9.923208976360911e-06, "loss": 0.4838, "step": 1071 }, { "epoch": 0.45149515653516775, "grad_norm": 0.471998006105423, "learning_rate": 9.922780486269382e-06, "loss": 0.5107, "step": 1072 }, { "epoch": 0.4519163273901446, "grad_norm": 0.5310439467430115, "learning_rate": 9.922350813327877e-06, "loss": 0.5577, "step": 1073 }, { "epoch": 0.4523374982451214, "grad_norm": 0.5362529754638672, "learning_rate": 9.921919957639644e-06, "loss": 0.4732, "step": 1074 }, { "epoch": 0.45275866910009827, "grad_norm": 0.48407143354415894, "learning_rate": 9.921487919308206e-06, "loss": 0.4997, "step": 1075 }, { "epoch": 0.4531798399550751, "grad_norm": 0.4795980751514435, "learning_rate": 9.921054698437376e-06, "loss": 0.4423, "step": 1076 }, { "epoch": 0.45360101081005194, "grad_norm": 0.5344313979148865, "learning_rate": 9.920620295131247e-06, "loss": 0.5365, "step": 1077 }, { "epoch": 0.4540221816650288, "grad_norm": 0.5947988629341125, "learning_rate": 9.920184709494199e-06, "loss": 0.5345, "step": 1078 }, { "epoch": 0.4544433525200056, "grad_norm": 0.488734632730484, "learning_rate": 9.919747941630896e-06, "loss": 0.4914, "step": 1079 }, { "epoch": 0.45486452337498245, "grad_norm": 0.552248477935791, "learning_rate": 9.919309991646282e-06, "loss": 0.5138, "step": 1080 }, { "epoch": 0.4552856942299593, "grad_norm": 0.5501003861427307, "learning_rate": 9.91887085964559e-06, "loss": 0.5045, "step": 1081 }, { "epoch": 0.45570686508493613, "grad_norm": 0.5167542695999146, "learning_rate": 9.918430545734334e-06, "loss": 0.4849, "step": 1082 }, { "epoch": 0.45612803593991297, "grad_norm": 0.4808852970600128, "learning_rate": 9.917989050018312e-06, "loss": 0.4737, "step": 1083 }, { "epoch": 0.4565492067948898, "grad_norm": 0.5009639263153076, "learning_rate": 9.917546372603612e-06, "loss": 0.4959, "step": 1084 }, { "epoch": 0.45697037764986664, "grad_norm": 0.5163744688034058, "learning_rate": 9.917102513596595e-06, "loss": 0.4988, "step": 1085 }, { "epoch": 0.4573915485048435, "grad_norm": 0.5365296602249146, "learning_rate": 9.916657473103915e-06, "loss": 0.4518, "step": 1086 }, { "epoch": 0.4578127193598203, "grad_norm": 0.5139666795730591, "learning_rate": 9.916211251232507e-06, "loss": 0.5411, "step": 1087 }, { "epoch": 0.45823389021479716, "grad_norm": 0.6230540871620178, "learning_rate": 9.915763848089587e-06, "loss": 0.5235, "step": 1088 }, { "epoch": 0.458655061069774, "grad_norm": 0.5414952039718628, "learning_rate": 9.915315263782659e-06, "loss": 0.504, "step": 1089 }, { "epoch": 0.45907623192475083, "grad_norm": 0.5023545622825623, "learning_rate": 9.91486549841951e-06, "loss": 0.5031, "step": 1090 }, { "epoch": 0.45949740277972767, "grad_norm": 0.5198925733566284, "learning_rate": 9.914414552108209e-06, "loss": 0.5115, "step": 1091 }, { "epoch": 0.45991857363470445, "grad_norm": 0.5518724322319031, "learning_rate": 9.913962424957109e-06, "loss": 0.5001, "step": 1092 }, { "epoch": 0.4603397444896813, "grad_norm": 0.45698386430740356, "learning_rate": 9.913509117074848e-06, "loss": 0.495, "step": 1093 }, { "epoch": 0.46076091534465813, "grad_norm": 0.5857415199279785, "learning_rate": 9.91305462857035e-06, "loss": 0.5081, "step": 1094 }, { "epoch": 0.46118208619963497, "grad_norm": 0.5429897308349609, "learning_rate": 9.912598959552816e-06, "loss": 0.5302, "step": 1095 }, { "epoch": 0.4616032570546118, "grad_norm": 0.5284080505371094, "learning_rate": 9.912142110131737e-06, "loss": 0.4823, "step": 1096 }, { "epoch": 0.46202442790958864, "grad_norm": 0.5758516788482666, "learning_rate": 9.911684080416883e-06, "loss": 0.5122, "step": 1097 }, { "epoch": 0.4624455987645655, "grad_norm": 0.5643895268440247, "learning_rate": 9.911224870518313e-06, "loss": 0.5155, "step": 1098 }, { "epoch": 0.4628667696195423, "grad_norm": 0.5843594074249268, "learning_rate": 9.910764480546362e-06, "loss": 0.4677, "step": 1099 }, { "epoch": 0.46328794047451916, "grad_norm": 0.5711727142333984, "learning_rate": 9.910302910611656e-06, "loss": 0.5091, "step": 1100 }, { "epoch": 0.463709111329496, "grad_norm": 0.591746985912323, "learning_rate": 9.909840160825101e-06, "loss": 0.5254, "step": 1101 }, { "epoch": 0.46413028218447283, "grad_norm": 0.5348014235496521, "learning_rate": 9.909376231297887e-06, "loss": 0.4729, "step": 1102 }, { "epoch": 0.46455145303944967, "grad_norm": 0.48721930384635925, "learning_rate": 9.908911122141486e-06, "loss": 0.4717, "step": 1103 }, { "epoch": 0.4649726238944265, "grad_norm": 0.48535090684890747, "learning_rate": 9.908444833467659e-06, "loss": 0.4799, "step": 1104 }, { "epoch": 0.46539379474940334, "grad_norm": 0.6000082492828369, "learning_rate": 9.907977365388439e-06, "loss": 0.5424, "step": 1105 }, { "epoch": 0.4658149656043802, "grad_norm": 0.5979001522064209, "learning_rate": 9.907508718016156e-06, "loss": 0.4862, "step": 1106 }, { "epoch": 0.466236136459357, "grad_norm": 0.4631212055683136, "learning_rate": 9.907038891463416e-06, "loss": 0.5056, "step": 1107 }, { "epoch": 0.46665730731433386, "grad_norm": 0.566827654838562, "learning_rate": 9.906567885843107e-06, "loss": 0.5086, "step": 1108 }, { "epoch": 0.4670784781693107, "grad_norm": 0.6046436429023743, "learning_rate": 9.9060957012684e-06, "loss": 0.4936, "step": 1109 }, { "epoch": 0.46749964902428753, "grad_norm": 0.48307710886001587, "learning_rate": 9.905622337852761e-06, "loss": 0.5217, "step": 1110 }, { "epoch": 0.46792081987926437, "grad_norm": 0.463703989982605, "learning_rate": 9.905147795709921e-06, "loss": 0.4893, "step": 1111 }, { "epoch": 0.4683419907342412, "grad_norm": 0.5447791814804077, "learning_rate": 9.90467207495391e-06, "loss": 0.5085, "step": 1112 }, { "epoch": 0.46876316158921805, "grad_norm": 0.5020771622657776, "learning_rate": 9.904195175699032e-06, "loss": 0.4706, "step": 1113 }, { "epoch": 0.4691843324441949, "grad_norm": 0.5448697805404663, "learning_rate": 9.903717098059876e-06, "loss": 0.514, "step": 1114 }, { "epoch": 0.4696055032991717, "grad_norm": 0.5598502159118652, "learning_rate": 9.903237842151313e-06, "loss": 0.4567, "step": 1115 }, { "epoch": 0.47002667415414856, "grad_norm": 0.5071452856063843, "learning_rate": 9.902757408088504e-06, "loss": 0.5612, "step": 1116 }, { "epoch": 0.47044784500912534, "grad_norm": 0.495337575674057, "learning_rate": 9.902275795986884e-06, "loss": 0.4978, "step": 1117 }, { "epoch": 0.4708690158641022, "grad_norm": 0.5905269384384155, "learning_rate": 9.90179300596218e-06, "loss": 0.5258, "step": 1118 }, { "epoch": 0.471290186719079, "grad_norm": 0.5397282242774963, "learning_rate": 9.901309038130392e-06, "loss": 0.5222, "step": 1119 }, { "epoch": 0.47171135757405586, "grad_norm": 0.5762771964073181, "learning_rate": 9.900823892607809e-06, "loss": 0.5076, "step": 1120 }, { "epoch": 0.4721325284290327, "grad_norm": 0.5443638563156128, "learning_rate": 9.900337569511003e-06, "loss": 0.5047, "step": 1121 }, { "epoch": 0.47255369928400953, "grad_norm": 0.5701370239257812, "learning_rate": 9.899850068956832e-06, "loss": 0.5134, "step": 1122 }, { "epoch": 0.47297487013898637, "grad_norm": 0.4607829749584198, "learning_rate": 9.899361391062427e-06, "loss": 0.4581, "step": 1123 }, { "epoch": 0.4733960409939632, "grad_norm": 0.5127384662628174, "learning_rate": 9.898871535945212e-06, "loss": 0.4853, "step": 1124 }, { "epoch": 0.47381721184894005, "grad_norm": 0.49543750286102295, "learning_rate": 9.898380503722886e-06, "loss": 0.4925, "step": 1125 }, { "epoch": 0.4742383827039169, "grad_norm": 0.4755815267562866, "learning_rate": 9.89788829451344e-06, "loss": 0.4448, "step": 1126 }, { "epoch": 0.4746595535588937, "grad_norm": 0.5375957489013672, "learning_rate": 9.897394908435138e-06, "loss": 0.4648, "step": 1127 }, { "epoch": 0.47508072441387056, "grad_norm": 0.5004990696907043, "learning_rate": 9.896900345606532e-06, "loss": 0.5234, "step": 1128 }, { "epoch": 0.4755018952688474, "grad_norm": 0.5288639068603516, "learning_rate": 9.896404606146457e-06, "loss": 0.5132, "step": 1129 }, { "epoch": 0.47592306612382423, "grad_norm": 0.5601378083229065, "learning_rate": 9.89590769017403e-06, "loss": 0.4765, "step": 1130 }, { "epoch": 0.47634423697880107, "grad_norm": 0.5199294686317444, "learning_rate": 9.895409597808649e-06, "loss": 0.5486, "step": 1131 }, { "epoch": 0.4767654078337779, "grad_norm": 0.49002334475517273, "learning_rate": 9.894910329169998e-06, "loss": 0.4754, "step": 1132 }, { "epoch": 0.47718657868875475, "grad_norm": 0.5781057476997375, "learning_rate": 9.89440988437804e-06, "loss": 0.4825, "step": 1133 }, { "epoch": 0.4776077495437316, "grad_norm": 0.475390762090683, "learning_rate": 9.893908263553023e-06, "loss": 0.4729, "step": 1134 }, { "epoch": 0.4780289203987084, "grad_norm": 0.4970915615558624, "learning_rate": 9.893405466815478e-06, "loss": 0.5026, "step": 1135 }, { "epoch": 0.47845009125368526, "grad_norm": 0.5449345707893372, "learning_rate": 9.892901494286214e-06, "loss": 0.4943, "step": 1136 }, { "epoch": 0.4788712621086621, "grad_norm": 0.5660333633422852, "learning_rate": 9.89239634608633e-06, "loss": 0.4837, "step": 1137 }, { "epoch": 0.47929243296363894, "grad_norm": 0.5658596754074097, "learning_rate": 9.891890022337201e-06, "loss": 0.5659, "step": 1138 }, { "epoch": 0.4797136038186158, "grad_norm": 0.544928789138794, "learning_rate": 9.891382523160487e-06, "loss": 0.4939, "step": 1139 }, { "epoch": 0.4801347746735926, "grad_norm": 0.5420757532119751, "learning_rate": 9.89087384867813e-06, "loss": 0.5349, "step": 1140 }, { "epoch": 0.48055594552856945, "grad_norm": 0.5033531188964844, "learning_rate": 9.890363999012357e-06, "loss": 0.4947, "step": 1141 }, { "epoch": 0.48097711638354623, "grad_norm": 0.5049625039100647, "learning_rate": 9.889852974285674e-06, "loss": 0.5235, "step": 1142 }, { "epoch": 0.48139828723852307, "grad_norm": 0.547959566116333, "learning_rate": 9.88934077462087e-06, "loss": 0.4838, "step": 1143 }, { "epoch": 0.4818194580934999, "grad_norm": 0.469156414270401, "learning_rate": 9.888827400141017e-06, "loss": 0.4742, "step": 1144 }, { "epoch": 0.48224062894847675, "grad_norm": 0.5777403712272644, "learning_rate": 9.88831285096947e-06, "loss": 0.5181, "step": 1145 }, { "epoch": 0.4826617998034536, "grad_norm": 0.530022919178009, "learning_rate": 9.887797127229862e-06, "loss": 0.4364, "step": 1146 }, { "epoch": 0.4830829706584304, "grad_norm": 0.47103917598724365, "learning_rate": 9.887280229046116e-06, "loss": 0.436, "step": 1147 }, { "epoch": 0.48350414151340726, "grad_norm": 0.5086380839347839, "learning_rate": 9.886762156542428e-06, "loss": 0.4507, "step": 1148 }, { "epoch": 0.4839253123683841, "grad_norm": 0.5070964694023132, "learning_rate": 9.886242909843287e-06, "loss": 0.4701, "step": 1149 }, { "epoch": 0.48434648322336094, "grad_norm": 0.5451953411102295, "learning_rate": 9.885722489073452e-06, "loss": 0.4777, "step": 1150 }, { "epoch": 0.4847676540783378, "grad_norm": 0.5165458917617798, "learning_rate": 9.885200894357974e-06, "loss": 0.4962, "step": 1151 }, { "epoch": 0.4851888249333146, "grad_norm": 0.5049980878829956, "learning_rate": 9.88467812582218e-06, "loss": 0.4935, "step": 1152 }, { "epoch": 0.48560999578829145, "grad_norm": 0.5114811658859253, "learning_rate": 9.884154183591684e-06, "loss": 0.4708, "step": 1153 }, { "epoch": 0.4860311666432683, "grad_norm": 0.5161516666412354, "learning_rate": 9.883629067792378e-06, "loss": 0.5028, "step": 1154 }, { "epoch": 0.4864523374982451, "grad_norm": 0.5177231431007385, "learning_rate": 9.883102778550434e-06, "loss": 0.5042, "step": 1155 }, { "epoch": 0.48687350835322196, "grad_norm": 0.5556531548500061, "learning_rate": 9.882575315992313e-06, "loss": 0.5632, "step": 1156 }, { "epoch": 0.4872946792081988, "grad_norm": 0.5797349810600281, "learning_rate": 9.882046680244754e-06, "loss": 0.4788, "step": 1157 }, { "epoch": 0.48771585006317564, "grad_norm": 0.5161490440368652, "learning_rate": 9.881516871434778e-06, "loss": 0.517, "step": 1158 }, { "epoch": 0.4881370209181525, "grad_norm": 0.5168392658233643, "learning_rate": 9.880985889689687e-06, "loss": 0.5504, "step": 1159 }, { "epoch": 0.4885581917731293, "grad_norm": 0.5177571177482605, "learning_rate": 9.880453735137064e-06, "loss": 0.5216, "step": 1160 }, { "epoch": 0.48897936262810615, "grad_norm": 0.564765453338623, "learning_rate": 9.879920407904779e-06, "loss": 0.5061, "step": 1161 }, { "epoch": 0.489400533483083, "grad_norm": 0.49636322259902954, "learning_rate": 9.87938590812098e-06, "loss": 0.5066, "step": 1162 }, { "epoch": 0.4898217043380598, "grad_norm": 0.4723376929759979, "learning_rate": 9.878850235914095e-06, "loss": 0.4625, "step": 1163 }, { "epoch": 0.49024287519303666, "grad_norm": 0.48712047934532166, "learning_rate": 9.878313391412836e-06, "loss": 0.4423, "step": 1164 }, { "epoch": 0.4906640460480135, "grad_norm": 0.5459235906600952, "learning_rate": 9.877775374746197e-06, "loss": 0.5241, "step": 1165 }, { "epoch": 0.49108521690299034, "grad_norm": 0.47378572821617126, "learning_rate": 9.877236186043454e-06, "loss": 0.5114, "step": 1166 }, { "epoch": 0.4915063877579671, "grad_norm": 0.512317419052124, "learning_rate": 9.876695825434164e-06, "loss": 0.4841, "step": 1167 }, { "epoch": 0.49192755861294396, "grad_norm": 0.5783916115760803, "learning_rate": 9.876154293048163e-06, "loss": 0.5188, "step": 1168 }, { "epoch": 0.4923487294679208, "grad_norm": 0.49082279205322266, "learning_rate": 9.875611589015572e-06, "loss": 0.5176, "step": 1169 }, { "epoch": 0.49276990032289764, "grad_norm": 0.5493561625480652, "learning_rate": 9.875067713466795e-06, "loss": 0.5004, "step": 1170 }, { "epoch": 0.4931910711778745, "grad_norm": 0.576141893863678, "learning_rate": 9.87452266653251e-06, "loss": 0.5278, "step": 1171 }, { "epoch": 0.4936122420328513, "grad_norm": 0.5158469080924988, "learning_rate": 9.873976448343685e-06, "loss": 0.5071, "step": 1172 }, { "epoch": 0.49403341288782815, "grad_norm": 0.48074832558631897, "learning_rate": 9.873429059031565e-06, "loss": 0.4743, "step": 1173 }, { "epoch": 0.494454583742805, "grad_norm": 0.5196962356567383, "learning_rate": 9.872880498727675e-06, "loss": 0.544, "step": 1174 }, { "epoch": 0.4948757545977818, "grad_norm": 0.5035938024520874, "learning_rate": 9.872330767563827e-06, "loss": 0.4675, "step": 1175 }, { "epoch": 0.49529692545275866, "grad_norm": 0.5512756705284119, "learning_rate": 9.871779865672109e-06, "loss": 0.4695, "step": 1176 }, { "epoch": 0.4957180963077355, "grad_norm": 0.4785759449005127, "learning_rate": 9.871227793184893e-06, "loss": 0.5101, "step": 1177 }, { "epoch": 0.49613926716271234, "grad_norm": 0.4648728370666504, "learning_rate": 9.87067455023483e-06, "loss": 0.4384, "step": 1178 }, { "epoch": 0.4965604380176892, "grad_norm": 0.5287153124809265, "learning_rate": 9.870120136954853e-06, "loss": 0.4995, "step": 1179 }, { "epoch": 0.496981608872666, "grad_norm": 0.5080562233924866, "learning_rate": 9.869564553478181e-06, "loss": 0.4725, "step": 1180 }, { "epoch": 0.49740277972764285, "grad_norm": 0.5211158990859985, "learning_rate": 9.869007799938306e-06, "loss": 0.4855, "step": 1181 }, { "epoch": 0.4978239505826197, "grad_norm": 0.5229096412658691, "learning_rate": 9.868449876469008e-06, "loss": 0.5141, "step": 1182 }, { "epoch": 0.4982451214375965, "grad_norm": 0.4980382025241852, "learning_rate": 9.867890783204344e-06, "loss": 0.4802, "step": 1183 }, { "epoch": 0.49866629229257337, "grad_norm": 0.49995580315589905, "learning_rate": 9.867330520278653e-06, "loss": 0.4917, "step": 1184 }, { "epoch": 0.4990874631475502, "grad_norm": 0.5040615200996399, "learning_rate": 9.866769087826556e-06, "loss": 0.4723, "step": 1185 }, { "epoch": 0.49950863400252704, "grad_norm": 0.49557384848594666, "learning_rate": 9.866206485982955e-06, "loss": 0.4722, "step": 1186 }, { "epoch": 0.4999298048575039, "grad_norm": 0.5508081316947937, "learning_rate": 9.865642714883033e-06, "loss": 0.5287, "step": 1187 }, { "epoch": 0.5003509757124807, "grad_norm": 0.5339958071708679, "learning_rate": 9.865077774662251e-06, "loss": 0.4734, "step": 1188 }, { "epoch": 0.5007721465674575, "grad_norm": 0.5454137325286865, "learning_rate": 9.864511665456355e-06, "loss": 0.4993, "step": 1189 }, { "epoch": 0.5011933174224343, "grad_norm": 0.5141319036483765, "learning_rate": 9.86394438740137e-06, "loss": 0.4905, "step": 1190 }, { "epoch": 0.5016144882774112, "grad_norm": 0.6771369576454163, "learning_rate": 9.8633759406336e-06, "loss": 0.5078, "step": 1191 }, { "epoch": 0.502035659132388, "grad_norm": 0.6138107776641846, "learning_rate": 9.862806325289635e-06, "loss": 0.5236, "step": 1192 }, { "epoch": 0.5024568299873649, "grad_norm": 0.5222311019897461, "learning_rate": 9.862235541506345e-06, "loss": 0.4785, "step": 1193 }, { "epoch": 0.5028780008423417, "grad_norm": 0.5901702642440796, "learning_rate": 9.861663589420871e-06, "loss": 0.4775, "step": 1194 }, { "epoch": 0.5032991716973185, "grad_norm": 0.6664824485778809, "learning_rate": 9.861090469170647e-06, "loss": 0.5076, "step": 1195 }, { "epoch": 0.5037203425522954, "grad_norm": 0.5478004217147827, "learning_rate": 9.860516180893383e-06, "loss": 0.5008, "step": 1196 }, { "epoch": 0.5041415134072722, "grad_norm": 0.5539632439613342, "learning_rate": 9.859940724727066e-06, "loss": 0.4876, "step": 1197 }, { "epoch": 0.504562684262249, "grad_norm": 0.5511875748634338, "learning_rate": 9.85936410080997e-06, "loss": 0.5171, "step": 1198 }, { "epoch": 0.5049838551172259, "grad_norm": 0.5268946290016174, "learning_rate": 9.858786309280648e-06, "loss": 0.5032, "step": 1199 }, { "epoch": 0.5054050259722027, "grad_norm": 0.5690274834632874, "learning_rate": 9.858207350277932e-06, "loss": 0.5059, "step": 1200 }, { "epoch": 0.5058261968271796, "grad_norm": 0.4925753176212311, "learning_rate": 9.85762722394093e-06, "loss": 0.5099, "step": 1201 }, { "epoch": 0.5062473676821564, "grad_norm": 0.6116470098495483, "learning_rate": 9.857045930409038e-06, "loss": 0.5045, "step": 1202 }, { "epoch": 0.5066685385371332, "grad_norm": 0.45826366543769836, "learning_rate": 9.856463469821932e-06, "loss": 0.4542, "step": 1203 }, { "epoch": 0.5070897093921101, "grad_norm": 0.5489162802696228, "learning_rate": 9.855879842319564e-06, "loss": 0.4819, "step": 1204 }, { "epoch": 0.5075108802470869, "grad_norm": 0.4742046296596527, "learning_rate": 9.855295048042169e-06, "loss": 0.4554, "step": 1205 }, { "epoch": 0.5079320511020637, "grad_norm": 0.4953957498073578, "learning_rate": 9.854709087130261e-06, "loss": 0.4724, "step": 1206 }, { "epoch": 0.5083532219570406, "grad_norm": 0.4774366319179535, "learning_rate": 9.854121959724637e-06, "loss": 0.4945, "step": 1207 }, { "epoch": 0.5087743928120174, "grad_norm": 0.5173519849777222, "learning_rate": 9.85353366596637e-06, "loss": 0.4349, "step": 1208 }, { "epoch": 0.5091955636669943, "grad_norm": 0.557041347026825, "learning_rate": 9.85294420599682e-06, "loss": 0.51, "step": 1209 }, { "epoch": 0.5096167345219711, "grad_norm": 0.46800974011421204, "learning_rate": 9.852353579957617e-06, "loss": 0.4737, "step": 1210 }, { "epoch": 0.5100379053769479, "grad_norm": 0.5672736167907715, "learning_rate": 9.851761787990681e-06, "loss": 0.5225, "step": 1211 }, { "epoch": 0.5104590762319248, "grad_norm": 0.48551705479621887, "learning_rate": 9.851168830238207e-06, "loss": 0.5084, "step": 1212 }, { "epoch": 0.5108802470869016, "grad_norm": 0.5674362778663635, "learning_rate": 9.850574706842673e-06, "loss": 0.4786, "step": 1213 }, { "epoch": 0.5113014179418784, "grad_norm": 0.508976936340332, "learning_rate": 9.849979417946835e-06, "loss": 0.5005, "step": 1214 }, { "epoch": 0.5117225887968553, "grad_norm": 0.49410372972488403, "learning_rate": 9.849382963693728e-06, "loss": 0.4897, "step": 1215 }, { "epoch": 0.5121437596518321, "grad_norm": 0.6269311904907227, "learning_rate": 9.84878534422667e-06, "loss": 0.5091, "step": 1216 }, { "epoch": 0.512564930506809, "grad_norm": 0.6247490644454956, "learning_rate": 9.848186559689256e-06, "loss": 0.4914, "step": 1217 }, { "epoch": 0.5129861013617858, "grad_norm": 0.5446016788482666, "learning_rate": 9.847586610225367e-06, "loss": 0.4947, "step": 1218 }, { "epoch": 0.5134072722167626, "grad_norm": 0.5769742131233215, "learning_rate": 9.846985495979151e-06, "loss": 0.4659, "step": 1219 }, { "epoch": 0.5138284430717395, "grad_norm": 0.6180410981178284, "learning_rate": 9.846383217095052e-06, "loss": 0.4995, "step": 1220 }, { "epoch": 0.5142496139267163, "grad_norm": 0.6066032648086548, "learning_rate": 9.845779773717783e-06, "loss": 0.5069, "step": 1221 }, { "epoch": 0.5146707847816931, "grad_norm": 0.4636895954608917, "learning_rate": 9.845175165992339e-06, "loss": 0.4416, "step": 1222 }, { "epoch": 0.51509195563667, "grad_norm": 0.5457524061203003, "learning_rate": 9.844569394063997e-06, "loss": 0.4683, "step": 1223 }, { "epoch": 0.5155131264916468, "grad_norm": 0.7184615731239319, "learning_rate": 9.843962458078313e-06, "loss": 0.5346, "step": 1224 }, { "epoch": 0.5159342973466237, "grad_norm": 0.5468956232070923, "learning_rate": 9.843354358181121e-06, "loss": 0.4675, "step": 1225 }, { "epoch": 0.5163554682016005, "grad_norm": 0.4948354959487915, "learning_rate": 9.842745094518536e-06, "loss": 0.5102, "step": 1226 }, { "epoch": 0.5167766390565773, "grad_norm": 0.5822984576225281, "learning_rate": 9.842134667236953e-06, "loss": 0.4773, "step": 1227 }, { "epoch": 0.5171978099115542, "grad_norm": 0.6482999324798584, "learning_rate": 9.841523076483045e-06, "loss": 0.5133, "step": 1228 }, { "epoch": 0.5176189807665309, "grad_norm": 0.5431896448135376, "learning_rate": 9.840910322403765e-06, "loss": 0.4812, "step": 1229 }, { "epoch": 0.5180401516215077, "grad_norm": 0.49757280945777893, "learning_rate": 9.840296405146349e-06, "loss": 0.4612, "step": 1230 }, { "epoch": 0.5184613224764846, "grad_norm": 0.5785533785820007, "learning_rate": 9.839681324858307e-06, "loss": 0.4723, "step": 1231 }, { "epoch": 0.5188824933314614, "grad_norm": 0.5283496975898743, "learning_rate": 9.839065081687432e-06, "loss": 0.4833, "step": 1232 }, { "epoch": 0.5193036641864383, "grad_norm": 0.48373085260391235, "learning_rate": 9.838447675781795e-06, "loss": 0.5248, "step": 1233 }, { "epoch": 0.5197248350414151, "grad_norm": 0.46941354870796204, "learning_rate": 9.837829107289745e-06, "loss": 0.478, "step": 1234 }, { "epoch": 0.5201460058963919, "grad_norm": 0.6068835854530334, "learning_rate": 9.837209376359918e-06, "loss": 0.5072, "step": 1235 }, { "epoch": 0.5205671767513688, "grad_norm": 0.5597879886627197, "learning_rate": 9.836588483141215e-06, "loss": 0.4937, "step": 1236 }, { "epoch": 0.5209883476063456, "grad_norm": 0.5404855608940125, "learning_rate": 9.835966427782832e-06, "loss": 0.5045, "step": 1237 }, { "epoch": 0.5214095184613224, "grad_norm": 0.4764634966850281, "learning_rate": 9.835343210434236e-06, "loss": 0.4416, "step": 1238 }, { "epoch": 0.5218306893162993, "grad_norm": 0.5220231413841248, "learning_rate": 9.834718831245171e-06, "loss": 0.4452, "step": 1239 }, { "epoch": 0.5222518601712761, "grad_norm": 0.5306071639060974, "learning_rate": 9.834093290365665e-06, "loss": 0.4806, "step": 1240 }, { "epoch": 0.522673031026253, "grad_norm": 0.5202628970146179, "learning_rate": 9.833466587946023e-06, "loss": 0.4661, "step": 1241 }, { "epoch": 0.5230942018812298, "grad_norm": 0.47667911648750305, "learning_rate": 9.832838724136831e-06, "loss": 0.4796, "step": 1242 }, { "epoch": 0.5235153727362066, "grad_norm": 0.49792295694351196, "learning_rate": 9.83220969908895e-06, "loss": 0.4849, "step": 1243 }, { "epoch": 0.5239365435911835, "grad_norm": 0.5231519341468811, "learning_rate": 9.831579512953526e-06, "loss": 0.4812, "step": 1244 }, { "epoch": 0.5243577144461603, "grad_norm": 0.4560157358646393, "learning_rate": 9.83094816588198e-06, "loss": 0.4938, "step": 1245 }, { "epoch": 0.5247788853011371, "grad_norm": 0.48664382100105286, "learning_rate": 9.830315658026011e-06, "loss": 0.506, "step": 1246 }, { "epoch": 0.525200056156114, "grad_norm": 0.49973493814468384, "learning_rate": 9.829681989537599e-06, "loss": 0.5092, "step": 1247 }, { "epoch": 0.5256212270110908, "grad_norm": 0.554927408695221, "learning_rate": 9.829047160569e-06, "loss": 0.526, "step": 1248 }, { "epoch": 0.5260423978660677, "grad_norm": 0.48209571838378906, "learning_rate": 9.828411171272758e-06, "loss": 0.477, "step": 1249 }, { "epoch": 0.5264635687210445, "grad_norm": 0.5263972282409668, "learning_rate": 9.827774021801684e-06, "loss": 0.5144, "step": 1250 }, { "epoch": 0.5268847395760213, "grad_norm": 0.49260827898979187, "learning_rate": 9.827135712308873e-06, "loss": 0.4502, "step": 1251 }, { "epoch": 0.5273059104309982, "grad_norm": 0.4864407181739807, "learning_rate": 9.8264962429477e-06, "loss": 0.5142, "step": 1252 }, { "epoch": 0.527727081285975, "grad_norm": 0.43593186140060425, "learning_rate": 9.825855613871816e-06, "loss": 0.473, "step": 1253 }, { "epoch": 0.5281482521409518, "grad_norm": 0.4962002635002136, "learning_rate": 9.825213825235153e-06, "loss": 0.5288, "step": 1254 }, { "epoch": 0.5285694229959287, "grad_norm": 0.507275402545929, "learning_rate": 9.82457087719192e-06, "loss": 0.4881, "step": 1255 }, { "epoch": 0.5289905938509055, "grad_norm": 0.5759797096252441, "learning_rate": 9.823926769896606e-06, "loss": 0.5322, "step": 1256 }, { "epoch": 0.5294117647058824, "grad_norm": 0.5425185561180115, "learning_rate": 9.823281503503976e-06, "loss": 0.5051, "step": 1257 }, { "epoch": 0.5298329355608592, "grad_norm": 0.4654744565486908, "learning_rate": 9.822635078169076e-06, "loss": 0.4822, "step": 1258 }, { "epoch": 0.530254106415836, "grad_norm": 0.5575202703475952, "learning_rate": 9.82198749404723e-06, "loss": 0.4906, "step": 1259 }, { "epoch": 0.5306752772708129, "grad_norm": 0.510966956615448, "learning_rate": 9.821338751294039e-06, "loss": 0.4738, "step": 1260 }, { "epoch": 0.5310964481257897, "grad_norm": 0.5197913646697998, "learning_rate": 9.820688850065386e-06, "loss": 0.4823, "step": 1261 }, { "epoch": 0.5315176189807665, "grad_norm": 0.5457261204719543, "learning_rate": 9.820037790517427e-06, "loss": 0.5243, "step": 1262 }, { "epoch": 0.5319387898357434, "grad_norm": 0.5587576031684875, "learning_rate": 9.819385572806598e-06, "loss": 0.4968, "step": 1263 }, { "epoch": 0.5323599606907202, "grad_norm": 0.5034623742103577, "learning_rate": 9.81873219708962e-06, "loss": 0.5243, "step": 1264 }, { "epoch": 0.5327811315456971, "grad_norm": 0.5155813097953796, "learning_rate": 9.818077663523482e-06, "loss": 0.5169, "step": 1265 }, { "epoch": 0.5332023024006739, "grad_norm": 0.6063551306724548, "learning_rate": 9.817421972265457e-06, "loss": 0.5207, "step": 1266 }, { "epoch": 0.5336234732556507, "grad_norm": 0.5358915328979492, "learning_rate": 9.816765123473097e-06, "loss": 0.4804, "step": 1267 }, { "epoch": 0.5340446441106276, "grad_norm": 0.508541464805603, "learning_rate": 9.816107117304227e-06, "loss": 0.5324, "step": 1268 }, { "epoch": 0.5344658149656044, "grad_norm": 0.5416736602783203, "learning_rate": 9.815447953916955e-06, "loss": 0.4458, "step": 1269 }, { "epoch": 0.5348869858205813, "grad_norm": 0.6344587206840515, "learning_rate": 9.814787633469665e-06, "loss": 0.5348, "step": 1270 }, { "epoch": 0.5353081566755581, "grad_norm": 0.5802713632583618, "learning_rate": 9.81412615612102e-06, "loss": 0.5238, "step": 1271 }, { "epoch": 0.5357293275305349, "grad_norm": 0.6037414073944092, "learning_rate": 9.813463522029958e-06, "loss": 0.5165, "step": 1272 }, { "epoch": 0.5361504983855118, "grad_norm": 0.5082756876945496, "learning_rate": 9.812799731355702e-06, "loss": 0.5024, "step": 1273 }, { "epoch": 0.5365716692404886, "grad_norm": 0.5622609257698059, "learning_rate": 9.812134784257743e-06, "loss": 0.4785, "step": 1274 }, { "epoch": 0.5369928400954654, "grad_norm": 0.49407655000686646, "learning_rate": 9.81146868089586e-06, "loss": 0.4766, "step": 1275 }, { "epoch": 0.5374140109504423, "grad_norm": 0.5106059908866882, "learning_rate": 9.810801421430098e-06, "loss": 0.522, "step": 1276 }, { "epoch": 0.5378351818054191, "grad_norm": 0.5363186597824097, "learning_rate": 9.810133006020795e-06, "loss": 0.508, "step": 1277 }, { "epoch": 0.538256352660396, "grad_norm": 0.506551206111908, "learning_rate": 9.809463434828552e-06, "loss": 0.5293, "step": 1278 }, { "epoch": 0.5386775235153728, "grad_norm": 0.500739336013794, "learning_rate": 9.80879270801426e-06, "loss": 0.4831, "step": 1279 }, { "epoch": 0.5390986943703495, "grad_norm": 0.4816391170024872, "learning_rate": 9.808120825739074e-06, "loss": 0.4902, "step": 1280 }, { "epoch": 0.5395198652253264, "grad_norm": 0.5042732954025269, "learning_rate": 9.807447788164441e-06, "loss": 0.4851, "step": 1281 }, { "epoch": 0.5399410360803032, "grad_norm": 0.5822870135307312, "learning_rate": 9.806773595452077e-06, "loss": 0.4775, "step": 1282 }, { "epoch": 0.54036220693528, "grad_norm": 0.4873739182949066, "learning_rate": 9.806098247763978e-06, "loss": 0.4711, "step": 1283 }, { "epoch": 0.5407833777902569, "grad_norm": 0.5301643013954163, "learning_rate": 9.805421745262415e-06, "loss": 0.4906, "step": 1284 }, { "epoch": 0.5412045486452337, "grad_norm": 0.5228998064994812, "learning_rate": 9.804744088109943e-06, "loss": 0.4764, "step": 1285 }, { "epoch": 0.5416257195002105, "grad_norm": 0.480650395154953, "learning_rate": 9.804065276469386e-06, "loss": 0.5051, "step": 1286 }, { "epoch": 0.5420468903551874, "grad_norm": 0.5352036952972412, "learning_rate": 9.80338531050385e-06, "loss": 0.4768, "step": 1287 }, { "epoch": 0.5424680612101642, "grad_norm": 0.48988693952560425, "learning_rate": 9.802704190376719e-06, "loss": 0.4812, "step": 1288 }, { "epoch": 0.5428892320651411, "grad_norm": 0.49473971128463745, "learning_rate": 9.802021916251652e-06, "loss": 0.4855, "step": 1289 }, { "epoch": 0.5433104029201179, "grad_norm": 0.4902283251285553, "learning_rate": 9.801338488292588e-06, "loss": 0.4933, "step": 1290 }, { "epoch": 0.5437315737750947, "grad_norm": 0.5302894711494446, "learning_rate": 9.80065390666374e-06, "loss": 0.4957, "step": 1291 }, { "epoch": 0.5441527446300716, "grad_norm": 0.585024893283844, "learning_rate": 9.799968171529601e-06, "loss": 0.4847, "step": 1292 }, { "epoch": 0.5445739154850484, "grad_norm": 0.4996405839920044, "learning_rate": 9.79928128305494e-06, "loss": 0.5111, "step": 1293 }, { "epoch": 0.5449950863400252, "grad_norm": 0.4555697739124298, "learning_rate": 9.798593241404804e-06, "loss": 0.484, "step": 1294 }, { "epoch": 0.5454162571950021, "grad_norm": 0.5805695056915283, "learning_rate": 9.797904046744513e-06, "loss": 0.4798, "step": 1295 }, { "epoch": 0.5458374280499789, "grad_norm": 0.49669331312179565, "learning_rate": 9.797213699239671e-06, "loss": 0.4705, "step": 1296 }, { "epoch": 0.5462585989049558, "grad_norm": 0.5355783104896545, "learning_rate": 9.796522199056153e-06, "loss": 0.5139, "step": 1297 }, { "epoch": 0.5466797697599326, "grad_norm": 0.48463135957717896, "learning_rate": 9.795829546360113e-06, "loss": 0.475, "step": 1298 }, { "epoch": 0.5471009406149094, "grad_norm": 0.4362669587135315, "learning_rate": 9.795135741317985e-06, "loss": 0.494, "step": 1299 }, { "epoch": 0.5475221114698863, "grad_norm": 0.5151882767677307, "learning_rate": 9.794440784096475e-06, "loss": 0.5128, "step": 1300 }, { "epoch": 0.5479432823248631, "grad_norm": 0.4861695468425751, "learning_rate": 9.793744674862567e-06, "loss": 0.4557, "step": 1301 }, { "epoch": 0.54836445317984, "grad_norm": 0.46176302433013916, "learning_rate": 9.793047413783526e-06, "loss": 0.4851, "step": 1302 }, { "epoch": 0.5487856240348168, "grad_norm": 0.5227203369140625, "learning_rate": 9.792349001026885e-06, "loss": 0.5064, "step": 1303 }, { "epoch": 0.5492067948897936, "grad_norm": 0.47169920802116394, "learning_rate": 9.791649436760464e-06, "loss": 0.4815, "step": 1304 }, { "epoch": 0.5496279657447705, "grad_norm": 0.49391108751296997, "learning_rate": 9.790948721152355e-06, "loss": 0.4896, "step": 1305 }, { "epoch": 0.5500491365997473, "grad_norm": 0.495656818151474, "learning_rate": 9.790246854370923e-06, "loss": 0.5162, "step": 1306 }, { "epoch": 0.5504703074547241, "grad_norm": 0.5187172889709473, "learning_rate": 9.789543836584815e-06, "loss": 0.4987, "step": 1307 }, { "epoch": 0.550891478309701, "grad_norm": 0.5226044654846191, "learning_rate": 9.788839667962956e-06, "loss": 0.5204, "step": 1308 }, { "epoch": 0.5513126491646778, "grad_norm": 0.47409844398498535, "learning_rate": 9.788134348674538e-06, "loss": 0.4925, "step": 1309 }, { "epoch": 0.5517338200196547, "grad_norm": 0.43465301394462585, "learning_rate": 9.787427878889043e-06, "loss": 0.4849, "step": 1310 }, { "epoch": 0.5521549908746315, "grad_norm": 0.5575310587882996, "learning_rate": 9.786720258776214e-06, "loss": 0.5126, "step": 1311 }, { "epoch": 0.5525761617296083, "grad_norm": 0.4581676721572876, "learning_rate": 9.786011488506084e-06, "loss": 0.5033, "step": 1312 }, { "epoch": 0.5529973325845852, "grad_norm": 0.46690961718559265, "learning_rate": 9.785301568248955e-06, "loss": 0.4712, "step": 1313 }, { "epoch": 0.553418503439562, "grad_norm": 0.49383115768432617, "learning_rate": 9.78459049817541e-06, "loss": 0.4982, "step": 1314 }, { "epoch": 0.5538396742945388, "grad_norm": 0.46870923042297363, "learning_rate": 9.7838782784563e-06, "loss": 0.4651, "step": 1315 }, { "epoch": 0.5542608451495157, "grad_norm": 0.5035197138786316, "learning_rate": 9.783164909262765e-06, "loss": 0.5068, "step": 1316 }, { "epoch": 0.5546820160044925, "grad_norm": 0.46613815426826477, "learning_rate": 9.782450390766207e-06, "loss": 0.4564, "step": 1317 }, { "epoch": 0.5551031868594694, "grad_norm": 0.4456237554550171, "learning_rate": 9.781734723138317e-06, "loss": 0.4951, "step": 1318 }, { "epoch": 0.5555243577144462, "grad_norm": 0.4716895520687103, "learning_rate": 9.781017906551052e-06, "loss": 0.4931, "step": 1319 }, { "epoch": 0.555945528569423, "grad_norm": 0.5266977548599243, "learning_rate": 9.78029994117665e-06, "loss": 0.4796, "step": 1320 }, { "epoch": 0.5563666994243999, "grad_norm": 0.45873376727104187, "learning_rate": 9.779580827187627e-06, "loss": 0.49, "step": 1321 }, { "epoch": 0.5567878702793767, "grad_norm": 0.502103328704834, "learning_rate": 9.778860564756769e-06, "loss": 0.5135, "step": 1322 }, { "epoch": 0.5572090411343535, "grad_norm": 0.5153399109840393, "learning_rate": 9.778139154057143e-06, "loss": 0.4867, "step": 1323 }, { "epoch": 0.5576302119893304, "grad_norm": 0.44783878326416016, "learning_rate": 9.777416595262091e-06, "loss": 0.4484, "step": 1324 }, { "epoch": 0.5580513828443072, "grad_norm": 0.4986724257469177, "learning_rate": 9.77669288854523e-06, "loss": 0.4863, "step": 1325 }, { "epoch": 0.5584725536992841, "grad_norm": 0.48942720890045166, "learning_rate": 9.77596803408045e-06, "loss": 0.5169, "step": 1326 }, { "epoch": 0.5588937245542609, "grad_norm": 0.48681074380874634, "learning_rate": 9.77524203204192e-06, "loss": 0.504, "step": 1327 }, { "epoch": 0.5593148954092377, "grad_norm": 0.4964390695095062, "learning_rate": 9.774514882604088e-06, "loss": 0.4918, "step": 1328 }, { "epoch": 0.5597360662642146, "grad_norm": 0.5501236319541931, "learning_rate": 9.773786585941673e-06, "loss": 0.5175, "step": 1329 }, { "epoch": 0.5601572371191913, "grad_norm": 0.4646945595741272, "learning_rate": 9.77305714222967e-06, "loss": 0.4977, "step": 1330 }, { "epoch": 0.5605784079741681, "grad_norm": 0.4786417782306671, "learning_rate": 9.772326551643347e-06, "loss": 0.4929, "step": 1331 }, { "epoch": 0.560999578829145, "grad_norm": 0.477287232875824, "learning_rate": 9.771594814358257e-06, "loss": 0.4893, "step": 1332 }, { "epoch": 0.5614207496841218, "grad_norm": 0.5024509429931641, "learning_rate": 9.77086193055022e-06, "loss": 0.4682, "step": 1333 }, { "epoch": 0.5618419205390986, "grad_norm": 0.4818061590194702, "learning_rate": 9.77012790039533e-06, "loss": 0.4848, "step": 1334 }, { "epoch": 0.5622630913940755, "grad_norm": 0.5574294924736023, "learning_rate": 9.769392724069969e-06, "loss": 0.5087, "step": 1335 }, { "epoch": 0.5626842622490523, "grad_norm": 0.5422350764274597, "learning_rate": 9.768656401750776e-06, "loss": 0.5073, "step": 1336 }, { "epoch": 0.5631054331040292, "grad_norm": 0.48551827669143677, "learning_rate": 9.767918933614683e-06, "loss": 0.4782, "step": 1337 }, { "epoch": 0.563526603959006, "grad_norm": 0.5062230825424194, "learning_rate": 9.767180319838884e-06, "loss": 0.4686, "step": 1338 }, { "epoch": 0.5639477748139828, "grad_norm": 0.5234948396682739, "learning_rate": 9.766440560600858e-06, "loss": 0.5168, "step": 1339 }, { "epoch": 0.5643689456689597, "grad_norm": 0.5344440937042236, "learning_rate": 9.765699656078354e-06, "loss": 0.4928, "step": 1340 }, { "epoch": 0.5647901165239365, "grad_norm": 0.5455300211906433, "learning_rate": 9.764957606449394e-06, "loss": 0.5247, "step": 1341 }, { "epoch": 0.5652112873789134, "grad_norm": 0.5082892179489136, "learning_rate": 9.764214411892283e-06, "loss": 0.5303, "step": 1342 }, { "epoch": 0.5656324582338902, "grad_norm": 0.4897652864456177, "learning_rate": 9.763470072585592e-06, "loss": 0.4609, "step": 1343 }, { "epoch": 0.566053629088867, "grad_norm": 0.6200530529022217, "learning_rate": 9.762724588708174e-06, "loss": 0.5029, "step": 1344 }, { "epoch": 0.5664747999438439, "grad_norm": 0.5058210492134094, "learning_rate": 9.761977960439156e-06, "loss": 0.49, "step": 1345 }, { "epoch": 0.5668959707988207, "grad_norm": 0.47281011939048767, "learning_rate": 9.761230187957935e-06, "loss": 0.4887, "step": 1346 }, { "epoch": 0.5673171416537975, "grad_norm": 0.5858778357505798, "learning_rate": 9.760481271444189e-06, "loss": 0.5069, "step": 1347 }, { "epoch": 0.5677383125087744, "grad_norm": 0.5180103778839111, "learning_rate": 9.759731211077867e-06, "loss": 0.4877, "step": 1348 }, { "epoch": 0.5681594833637512, "grad_norm": 0.49284136295318604, "learning_rate": 9.758980007039194e-06, "loss": 0.4999, "step": 1349 }, { "epoch": 0.568580654218728, "grad_norm": 0.47972002625465393, "learning_rate": 9.75822765950867e-06, "loss": 0.4957, "step": 1350 }, { "epoch": 0.5690018250737049, "grad_norm": 0.5134989023208618, "learning_rate": 9.757474168667072e-06, "loss": 0.4855, "step": 1351 }, { "epoch": 0.5694229959286817, "grad_norm": 0.556139349937439, "learning_rate": 9.756719534695446e-06, "loss": 0.5446, "step": 1352 }, { "epoch": 0.5698441667836586, "grad_norm": 0.528734028339386, "learning_rate": 9.755963757775117e-06, "loss": 0.5035, "step": 1353 }, { "epoch": 0.5702653376386354, "grad_norm": 0.5330990552902222, "learning_rate": 9.755206838087687e-06, "loss": 0.4984, "step": 1354 }, { "epoch": 0.5706865084936122, "grad_norm": 0.5012854933738708, "learning_rate": 9.754448775815026e-06, "loss": 0.4935, "step": 1355 }, { "epoch": 0.5711076793485891, "grad_norm": 0.592531681060791, "learning_rate": 9.753689571139283e-06, "loss": 0.5081, "step": 1356 }, { "epoch": 0.5715288502035659, "grad_norm": 0.5889626145362854, "learning_rate": 9.752929224242878e-06, "loss": 0.5073, "step": 1357 }, { "epoch": 0.5719500210585428, "grad_norm": 0.5079895257949829, "learning_rate": 9.752167735308511e-06, "loss": 0.4948, "step": 1358 }, { "epoch": 0.5723711919135196, "grad_norm": 0.5591831803321838, "learning_rate": 9.751405104519151e-06, "loss": 0.4884, "step": 1359 }, { "epoch": 0.5727923627684964, "grad_norm": 0.456264466047287, "learning_rate": 9.750641332058045e-06, "loss": 0.4983, "step": 1360 }, { "epoch": 0.5732135336234733, "grad_norm": 0.5047061443328857, "learning_rate": 9.749876418108714e-06, "loss": 0.4799, "step": 1361 }, { "epoch": 0.5736347044784501, "grad_norm": 0.5146756172180176, "learning_rate": 9.74911036285495e-06, "loss": 0.5042, "step": 1362 }, { "epoch": 0.574055875333427, "grad_norm": 0.5241327285766602, "learning_rate": 9.748343166480823e-06, "loss": 0.4963, "step": 1363 }, { "epoch": 0.5744770461884038, "grad_norm": 0.48845309019088745, "learning_rate": 9.747574829170674e-06, "loss": 0.5155, "step": 1364 }, { "epoch": 0.5748982170433806, "grad_norm": 0.5042930841445923, "learning_rate": 9.74680535110912e-06, "loss": 0.4792, "step": 1365 }, { "epoch": 0.5753193878983575, "grad_norm": 0.5400742292404175, "learning_rate": 9.746034732481053e-06, "loss": 0.5249, "step": 1366 }, { "epoch": 0.5757405587533343, "grad_norm": 0.4817892014980316, "learning_rate": 9.745262973471638e-06, "loss": 0.4916, "step": 1367 }, { "epoch": 0.5761617296083111, "grad_norm": 0.5053383708000183, "learning_rate": 9.744490074266311e-06, "loss": 0.5346, "step": 1368 }, { "epoch": 0.576582900463288, "grad_norm": 0.4886895716190338, "learning_rate": 9.74371603505079e-06, "loss": 0.4892, "step": 1369 }, { "epoch": 0.5770040713182648, "grad_norm": 0.47363314032554626, "learning_rate": 9.742940856011057e-06, "loss": 0.4751, "step": 1370 }, { "epoch": 0.5774252421732416, "grad_norm": 0.42163926362991333, "learning_rate": 9.742164537333374e-06, "loss": 0.4433, "step": 1371 }, { "epoch": 0.5778464130282185, "grad_norm": 0.5524975657463074, "learning_rate": 9.741387079204278e-06, "loss": 0.524, "step": 1372 }, { "epoch": 0.5782675838831953, "grad_norm": 0.5049740076065063, "learning_rate": 9.740608481810573e-06, "loss": 0.5092, "step": 1373 }, { "epoch": 0.5786887547381722, "grad_norm": 0.4708840548992157, "learning_rate": 9.739828745339347e-06, "loss": 0.4652, "step": 1374 }, { "epoch": 0.579109925593149, "grad_norm": 0.5274085402488708, "learning_rate": 9.73904786997795e-06, "loss": 0.4515, "step": 1375 }, { "epoch": 0.5795310964481258, "grad_norm": 0.5332657098770142, "learning_rate": 9.738265855914014e-06, "loss": 0.487, "step": 1376 }, { "epoch": 0.5799522673031027, "grad_norm": 0.5117377638816833, "learning_rate": 9.737482703335441e-06, "loss": 0.4562, "step": 1377 }, { "epoch": 0.5803734381580795, "grad_norm": 0.5415506958961487, "learning_rate": 9.736698412430409e-06, "loss": 0.5537, "step": 1378 }, { "epoch": 0.5807946090130564, "grad_norm": 0.4158678352832794, "learning_rate": 9.735912983387368e-06, "loss": 0.4319, "step": 1379 }, { "epoch": 0.5812157798680331, "grad_norm": 0.4699445962905884, "learning_rate": 9.73512641639504e-06, "loss": 0.4721, "step": 1380 }, { "epoch": 0.5816369507230099, "grad_norm": 0.5107735991477966, "learning_rate": 9.734338711642423e-06, "loss": 0.4795, "step": 1381 }, { "epoch": 0.5820581215779868, "grad_norm": 0.5167118310928345, "learning_rate": 9.733549869318787e-06, "loss": 0.5228, "step": 1382 }, { "epoch": 0.5824792924329636, "grad_norm": 0.5066666603088379, "learning_rate": 9.732759889613677e-06, "loss": 0.4757, "step": 1383 }, { "epoch": 0.5829004632879404, "grad_norm": 0.5499109625816345, "learning_rate": 9.73196877271691e-06, "loss": 0.5024, "step": 1384 }, { "epoch": 0.5833216341429173, "grad_norm": 0.5454306602478027, "learning_rate": 9.731176518818573e-06, "loss": 0.5213, "step": 1385 }, { "epoch": 0.5837428049978941, "grad_norm": 0.45278412103652954, "learning_rate": 9.730383128109032e-06, "loss": 0.4699, "step": 1386 }, { "epoch": 0.5841639758528709, "grad_norm": 0.5276970863342285, "learning_rate": 9.729588600778924e-06, "loss": 0.4728, "step": 1387 }, { "epoch": 0.5845851467078478, "grad_norm": 0.5639858245849609, "learning_rate": 9.728792937019158e-06, "loss": 0.501, "step": 1388 }, { "epoch": 0.5850063175628246, "grad_norm": 0.4874800741672516, "learning_rate": 9.727996137020918e-06, "loss": 0.4522, "step": 1389 }, { "epoch": 0.5854274884178015, "grad_norm": 0.48857781291007996, "learning_rate": 9.727198200975658e-06, "loss": 0.4814, "step": 1390 }, { "epoch": 0.5858486592727783, "grad_norm": 0.4719027578830719, "learning_rate": 9.726399129075107e-06, "loss": 0.4838, "step": 1391 }, { "epoch": 0.5862698301277551, "grad_norm": 0.5737476348876953, "learning_rate": 9.725598921511269e-06, "loss": 0.533, "step": 1392 }, { "epoch": 0.586691000982732, "grad_norm": 0.5269880294799805, "learning_rate": 9.724797578476414e-06, "loss": 0.5118, "step": 1393 }, { "epoch": 0.5871121718377088, "grad_norm": 0.5092182159423828, "learning_rate": 9.723995100163093e-06, "loss": 0.4656, "step": 1394 }, { "epoch": 0.5875333426926856, "grad_norm": 0.4896168112754822, "learning_rate": 9.723191486764125e-06, "loss": 0.5022, "step": 1395 }, { "epoch": 0.5879545135476625, "grad_norm": 0.5233773589134216, "learning_rate": 9.722386738472604e-06, "loss": 0.4884, "step": 1396 }, { "epoch": 0.5883756844026393, "grad_norm": 0.49424365162849426, "learning_rate": 9.721580855481895e-06, "loss": 0.4987, "step": 1397 }, { "epoch": 0.5887968552576162, "grad_norm": 0.5358529686927795, "learning_rate": 9.720773837985636e-06, "loss": 0.493, "step": 1398 }, { "epoch": 0.589218026112593, "grad_norm": 0.4623827636241913, "learning_rate": 9.719965686177738e-06, "loss": 0.4907, "step": 1399 }, { "epoch": 0.5896391969675698, "grad_norm": 0.4725036323070526, "learning_rate": 9.719156400252383e-06, "loss": 0.4769, "step": 1400 }, { "epoch": 0.5900603678225467, "grad_norm": 0.49758049845695496, "learning_rate": 9.718345980404029e-06, "loss": 0.4908, "step": 1401 }, { "epoch": 0.5904815386775235, "grad_norm": 0.5157256722450256, "learning_rate": 9.717534426827405e-06, "loss": 0.509, "step": 1402 }, { "epoch": 0.5909027095325003, "grad_norm": 0.4570724368095398, "learning_rate": 9.716721739717509e-06, "loss": 0.4611, "step": 1403 }, { "epoch": 0.5913238803874772, "grad_norm": 0.534808874130249, "learning_rate": 9.715907919269617e-06, "loss": 0.4936, "step": 1404 }, { "epoch": 0.591745051242454, "grad_norm": 0.4599277377128601, "learning_rate": 9.71509296567927e-06, "loss": 0.4939, "step": 1405 }, { "epoch": 0.5921662220974309, "grad_norm": 0.5515491962432861, "learning_rate": 9.714276879142293e-06, "loss": 0.5291, "step": 1406 }, { "epoch": 0.5925873929524077, "grad_norm": 0.45785659551620483, "learning_rate": 9.713459659854769e-06, "loss": 0.4846, "step": 1407 }, { "epoch": 0.5930085638073845, "grad_norm": 0.4474617838859558, "learning_rate": 9.712641308013065e-06, "loss": 0.4947, "step": 1408 }, { "epoch": 0.5934297346623614, "grad_norm": 0.5442335605621338, "learning_rate": 9.711821823813812e-06, "loss": 0.5093, "step": 1409 }, { "epoch": 0.5938509055173382, "grad_norm": 0.4916386902332306, "learning_rate": 9.711001207453919e-06, "loss": 0.4839, "step": 1410 }, { "epoch": 0.594272076372315, "grad_norm": 0.42885857820510864, "learning_rate": 9.710179459130562e-06, "loss": 0.4629, "step": 1411 }, { "epoch": 0.5946932472272919, "grad_norm": 0.47237905859947205, "learning_rate": 9.709356579041191e-06, "loss": 0.4956, "step": 1412 }, { "epoch": 0.5951144180822687, "grad_norm": 0.5010643005371094, "learning_rate": 9.708532567383534e-06, "loss": 0.5351, "step": 1413 }, { "epoch": 0.5955355889372456, "grad_norm": 0.47644174098968506, "learning_rate": 9.707707424355578e-06, "loss": 0.4952, "step": 1414 }, { "epoch": 0.5959567597922224, "grad_norm": 0.5095376968383789, "learning_rate": 9.706881150155592e-06, "loss": 0.5109, "step": 1415 }, { "epoch": 0.5963779306471992, "grad_norm": 0.4611484408378601, "learning_rate": 9.706053744982116e-06, "loss": 0.4993, "step": 1416 }, { "epoch": 0.5967991015021761, "grad_norm": 0.4616256654262543, "learning_rate": 9.705225209033957e-06, "loss": 0.5039, "step": 1417 }, { "epoch": 0.5972202723571529, "grad_norm": 0.49303701519966125, "learning_rate": 9.704395542510196e-06, "loss": 0.4413, "step": 1418 }, { "epoch": 0.5976414432121298, "grad_norm": 0.45468318462371826, "learning_rate": 9.703564745610187e-06, "loss": 0.519, "step": 1419 }, { "epoch": 0.5980626140671066, "grad_norm": 0.4426930844783783, "learning_rate": 9.702732818533555e-06, "loss": 0.4399, "step": 1420 }, { "epoch": 0.5984837849220834, "grad_norm": 0.4626946449279785, "learning_rate": 9.701899761480197e-06, "loss": 0.4931, "step": 1421 }, { "epoch": 0.5989049557770603, "grad_norm": 0.5138938426971436, "learning_rate": 9.701065574650279e-06, "loss": 0.4734, "step": 1422 }, { "epoch": 0.5993261266320371, "grad_norm": 0.5066606998443604, "learning_rate": 9.70023025824424e-06, "loss": 0.5563, "step": 1423 }, { "epoch": 0.5997472974870139, "grad_norm": 0.5193162560462952, "learning_rate": 9.699393812462792e-06, "loss": 0.4845, "step": 1424 }, { "epoch": 0.6001684683419908, "grad_norm": 0.5074121356010437, "learning_rate": 9.698556237506915e-06, "loss": 0.4803, "step": 1425 }, { "epoch": 0.6005896391969676, "grad_norm": 0.44880950450897217, "learning_rate": 9.697717533577863e-06, "loss": 0.5056, "step": 1426 }, { "epoch": 0.6010108100519445, "grad_norm": 0.563107967376709, "learning_rate": 9.696877700877162e-06, "loss": 0.5225, "step": 1427 }, { "epoch": 0.6014319809069213, "grad_norm": 0.7580299973487854, "learning_rate": 9.696036739606606e-06, "loss": 0.5928, "step": 1428 }, { "epoch": 0.6018531517618981, "grad_norm": 0.5237016081809998, "learning_rate": 9.695194649968262e-06, "loss": 0.4958, "step": 1429 }, { "epoch": 0.6022743226168749, "grad_norm": 0.517253041267395, "learning_rate": 9.69435143216447e-06, "loss": 0.4913, "step": 1430 }, { "epoch": 0.6026954934718517, "grad_norm": 0.5554940700531006, "learning_rate": 9.693507086397837e-06, "loss": 0.477, "step": 1431 }, { "epoch": 0.6031166643268285, "grad_norm": 0.4959139823913574, "learning_rate": 9.692661612871242e-06, "loss": 0.5323, "step": 1432 }, { "epoch": 0.6035378351818054, "grad_norm": 0.4613363742828369, "learning_rate": 9.69181501178784e-06, "loss": 0.4507, "step": 1433 }, { "epoch": 0.6039590060367822, "grad_norm": 0.47828489542007446, "learning_rate": 9.690967283351051e-06, "loss": 0.4863, "step": 1434 }, { "epoch": 0.604380176891759, "grad_norm": 0.4875170588493347, "learning_rate": 9.690118427764566e-06, "loss": 0.502, "step": 1435 }, { "epoch": 0.6048013477467359, "grad_norm": 0.4464899003505707, "learning_rate": 9.689268445232352e-06, "loss": 0.5067, "step": 1436 }, { "epoch": 0.6052225186017127, "grad_norm": 0.4475775361061096, "learning_rate": 9.68841733595864e-06, "loss": 0.4777, "step": 1437 }, { "epoch": 0.6056436894566896, "grad_norm": 0.44696852564811707, "learning_rate": 9.68756510014794e-06, "loss": 0.4968, "step": 1438 }, { "epoch": 0.6060648603116664, "grad_norm": 0.48363134264945984, "learning_rate": 9.686711738005023e-06, "loss": 0.4739, "step": 1439 }, { "epoch": 0.6064860311666432, "grad_norm": 0.4576132595539093, "learning_rate": 9.685857249734941e-06, "loss": 0.4904, "step": 1440 }, { "epoch": 0.6069072020216201, "grad_norm": 0.4604280889034271, "learning_rate": 9.685001635543005e-06, "loss": 0.4894, "step": 1441 }, { "epoch": 0.6073283728765969, "grad_norm": 0.4817074239253998, "learning_rate": 9.684144895634808e-06, "loss": 0.494, "step": 1442 }, { "epoch": 0.6077495437315737, "grad_norm": 0.4849626421928406, "learning_rate": 9.683287030216205e-06, "loss": 0.5107, "step": 1443 }, { "epoch": 0.6081707145865506, "grad_norm": 0.537810742855072, "learning_rate": 9.682428039493325e-06, "loss": 0.5196, "step": 1444 }, { "epoch": 0.6085918854415274, "grad_norm": 0.5159298181533813, "learning_rate": 9.68156792367257e-06, "loss": 0.4956, "step": 1445 }, { "epoch": 0.6090130562965043, "grad_norm": 0.5478789210319519, "learning_rate": 9.680706682960605e-06, "loss": 0.482, "step": 1446 }, { "epoch": 0.6094342271514811, "grad_norm": 0.46011799573898315, "learning_rate": 9.679844317564374e-06, "loss": 0.4851, "step": 1447 }, { "epoch": 0.6098553980064579, "grad_norm": 0.45323875546455383, "learning_rate": 9.678980827691082e-06, "loss": 0.4928, "step": 1448 }, { "epoch": 0.6102765688614348, "grad_norm": 0.5164022445678711, "learning_rate": 9.678116213548214e-06, "loss": 0.489, "step": 1449 }, { "epoch": 0.6106977397164116, "grad_norm": 0.47115451097488403, "learning_rate": 9.677250475343516e-06, "loss": 0.4796, "step": 1450 }, { "epoch": 0.6111189105713885, "grad_norm": 0.5112099647521973, "learning_rate": 9.676383613285011e-06, "loss": 0.5138, "step": 1451 }, { "epoch": 0.6115400814263653, "grad_norm": 0.49574047327041626, "learning_rate": 9.67551562758099e-06, "loss": 0.4924, "step": 1452 }, { "epoch": 0.6119612522813421, "grad_norm": 0.43058714270591736, "learning_rate": 9.674646518440012e-06, "loss": 0.4964, "step": 1453 }, { "epoch": 0.612382423136319, "grad_norm": 0.5475805997848511, "learning_rate": 9.673776286070905e-06, "loss": 0.4835, "step": 1454 }, { "epoch": 0.6128035939912958, "grad_norm": 0.5280346870422363, "learning_rate": 9.672904930682773e-06, "loss": 0.5002, "step": 1455 }, { "epoch": 0.6132247648462726, "grad_norm": 0.546582818031311, "learning_rate": 9.672032452484986e-06, "loss": 0.5004, "step": 1456 }, { "epoch": 0.6136459357012495, "grad_norm": 0.4924919307231903, "learning_rate": 9.671158851687181e-06, "loss": 0.4716, "step": 1457 }, { "epoch": 0.6140671065562263, "grad_norm": 0.4987263083457947, "learning_rate": 9.67028412849927e-06, "loss": 0.4882, "step": 1458 }, { "epoch": 0.6144882774112032, "grad_norm": 0.5674735307693481, "learning_rate": 9.66940828313143e-06, "loss": 0.5331, "step": 1459 }, { "epoch": 0.61490944826618, "grad_norm": 0.5603139400482178, "learning_rate": 9.668531315794112e-06, "loss": 0.5268, "step": 1460 }, { "epoch": 0.6153306191211568, "grad_norm": 0.5553701519966125, "learning_rate": 9.667653226698033e-06, "loss": 0.4917, "step": 1461 }, { "epoch": 0.6157517899761337, "grad_norm": 0.49197617173194885, "learning_rate": 9.66677401605418e-06, "loss": 0.5, "step": 1462 }, { "epoch": 0.6161729608311105, "grad_norm": 0.5234770178794861, "learning_rate": 9.665893684073814e-06, "loss": 0.4703, "step": 1463 }, { "epoch": 0.6165941316860873, "grad_norm": 0.5075964331626892, "learning_rate": 9.66501223096846e-06, "loss": 0.5326, "step": 1464 }, { "epoch": 0.6170153025410642, "grad_norm": 0.4445168375968933, "learning_rate": 9.664129656949915e-06, "loss": 0.4425, "step": 1465 }, { "epoch": 0.617436473396041, "grad_norm": 0.5673466324806213, "learning_rate": 9.663245962230244e-06, "loss": 0.5024, "step": 1466 }, { "epoch": 0.6178576442510179, "grad_norm": 0.4964029788970947, "learning_rate": 9.66236114702178e-06, "loss": 0.4678, "step": 1467 }, { "epoch": 0.6182788151059947, "grad_norm": 0.46163812279701233, "learning_rate": 9.66147521153713e-06, "loss": 0.4982, "step": 1468 }, { "epoch": 0.6186999859609715, "grad_norm": 0.46157917380332947, "learning_rate": 9.660588155989167e-06, "loss": 0.4771, "step": 1469 }, { "epoch": 0.6191211568159484, "grad_norm": 0.506488025188446, "learning_rate": 9.659699980591031e-06, "loss": 0.4979, "step": 1470 }, { "epoch": 0.6195423276709252, "grad_norm": 0.5126873850822449, "learning_rate": 9.658810685556138e-06, "loss": 0.5209, "step": 1471 }, { "epoch": 0.619963498525902, "grad_norm": 0.5389443039894104, "learning_rate": 9.657920271098164e-06, "loss": 0.4928, "step": 1472 }, { "epoch": 0.6203846693808789, "grad_norm": 0.5172042846679688, "learning_rate": 9.657028737431062e-06, "loss": 0.4687, "step": 1473 }, { "epoch": 0.6208058402358557, "grad_norm": 0.47075873613357544, "learning_rate": 9.65613608476905e-06, "loss": 0.493, "step": 1474 }, { "epoch": 0.6212270110908326, "grad_norm": 0.5332589149475098, "learning_rate": 9.655242313326613e-06, "loss": 0.4847, "step": 1475 }, { "epoch": 0.6216481819458094, "grad_norm": 0.5503120422363281, "learning_rate": 9.654347423318509e-06, "loss": 0.5238, "step": 1476 }, { "epoch": 0.6220693528007862, "grad_norm": 0.5348350405693054, "learning_rate": 9.653451414959763e-06, "loss": 0.4798, "step": 1477 }, { "epoch": 0.6224905236557631, "grad_norm": 0.5294082760810852, "learning_rate": 9.652554288465668e-06, "loss": 0.4828, "step": 1478 }, { "epoch": 0.6229116945107399, "grad_norm": 0.5252948999404907, "learning_rate": 9.651656044051786e-06, "loss": 0.5317, "step": 1479 }, { "epoch": 0.6233328653657166, "grad_norm": 0.5151966214179993, "learning_rate": 9.650756681933948e-06, "loss": 0.4867, "step": 1480 }, { "epoch": 0.6237540362206935, "grad_norm": 0.4968695044517517, "learning_rate": 9.649856202328255e-06, "loss": 0.4974, "step": 1481 }, { "epoch": 0.6241752070756703, "grad_norm": 0.5243732929229736, "learning_rate": 9.648954605451073e-06, "loss": 0.5214, "step": 1482 }, { "epoch": 0.6245963779306472, "grad_norm": 0.5030009746551514, "learning_rate": 9.64805189151904e-06, "loss": 0.5356, "step": 1483 }, { "epoch": 0.625017548785624, "grad_norm": 0.5088675022125244, "learning_rate": 9.647148060749062e-06, "loss": 0.5131, "step": 1484 }, { "epoch": 0.6254387196406008, "grad_norm": 0.532686173915863, "learning_rate": 9.64624311335831e-06, "loss": 0.5449, "step": 1485 }, { "epoch": 0.6258598904955777, "grad_norm": 0.4915114939212799, "learning_rate": 9.645337049564225e-06, "loss": 0.5009, "step": 1486 }, { "epoch": 0.6262810613505545, "grad_norm": 0.505038857460022, "learning_rate": 9.64442986958452e-06, "loss": 0.4989, "step": 1487 }, { "epoch": 0.6267022322055313, "grad_norm": 0.4804774224758148, "learning_rate": 9.643521573637169e-06, "loss": 0.4973, "step": 1488 }, { "epoch": 0.6271234030605082, "grad_norm": 0.5117925405502319, "learning_rate": 9.642612161940422e-06, "loss": 0.4757, "step": 1489 }, { "epoch": 0.627544573915485, "grad_norm": 0.46592259407043457, "learning_rate": 9.64170163471279e-06, "loss": 0.4749, "step": 1490 }, { "epoch": 0.6279657447704619, "grad_norm": 0.4967755973339081, "learning_rate": 9.640789992173057e-06, "loss": 0.4928, "step": 1491 }, { "epoch": 0.6283869156254387, "grad_norm": 0.48536646366119385, "learning_rate": 9.639877234540273e-06, "loss": 0.4583, "step": 1492 }, { "epoch": 0.6288080864804155, "grad_norm": 0.51003098487854, "learning_rate": 9.638963362033756e-06, "loss": 0.4938, "step": 1493 }, { "epoch": 0.6292292573353924, "grad_norm": 0.491066038608551, "learning_rate": 9.638048374873092e-06, "loss": 0.4915, "step": 1494 }, { "epoch": 0.6296504281903692, "grad_norm": 0.45294877886772156, "learning_rate": 9.637132273278135e-06, "loss": 0.5026, "step": 1495 }, { "epoch": 0.630071599045346, "grad_norm": 0.5398502945899963, "learning_rate": 9.636215057469009e-06, "loss": 0.5226, "step": 1496 }, { "epoch": 0.6304927699003229, "grad_norm": 0.5207974910736084, "learning_rate": 9.6352967276661e-06, "loss": 0.5208, "step": 1497 }, { "epoch": 0.6309139407552997, "grad_norm": 0.5027308464050293, "learning_rate": 9.634377284090064e-06, "loss": 0.4887, "step": 1498 }, { "epoch": 0.6313351116102766, "grad_norm": 0.506563127040863, "learning_rate": 9.633456726961829e-06, "loss": 0.5055, "step": 1499 }, { "epoch": 0.6317562824652534, "grad_norm": 0.5169416666030884, "learning_rate": 9.632535056502588e-06, "loss": 0.4593, "step": 1500 }, { "epoch": 0.6321774533202302, "grad_norm": 0.49307480454444885, "learning_rate": 9.631612272933798e-06, "loss": 0.4957, "step": 1501 }, { "epoch": 0.6325986241752071, "grad_norm": 0.5238453149795532, "learning_rate": 9.630688376477188e-06, "loss": 0.4928, "step": 1502 }, { "epoch": 0.6330197950301839, "grad_norm": 0.4645712673664093, "learning_rate": 9.629763367354751e-06, "loss": 0.4749, "step": 1503 }, { "epoch": 0.6334409658851607, "grad_norm": 0.5447698831558228, "learning_rate": 9.628837245788749e-06, "loss": 0.4777, "step": 1504 }, { "epoch": 0.6338621367401376, "grad_norm": 0.5841093063354492, "learning_rate": 9.627910012001714e-06, "loss": 0.4991, "step": 1505 }, { "epoch": 0.6342833075951144, "grad_norm": 0.5202933549880981, "learning_rate": 9.62698166621644e-06, "loss": 0.4946, "step": 1506 }, { "epoch": 0.6347044784500913, "grad_norm": 0.4990841746330261, "learning_rate": 9.626052208655992e-06, "loss": 0.4842, "step": 1507 }, { "epoch": 0.6351256493050681, "grad_norm": 0.6045917272567749, "learning_rate": 9.6251216395437e-06, "loss": 0.5001, "step": 1508 }, { "epoch": 0.6355468201600449, "grad_norm": 0.5058935880661011, "learning_rate": 9.624189959103162e-06, "loss": 0.465, "step": 1509 }, { "epoch": 0.6359679910150218, "grad_norm": 0.5569053292274475, "learning_rate": 9.623257167558244e-06, "loss": 0.5176, "step": 1510 }, { "epoch": 0.6363891618699986, "grad_norm": 0.5257039666175842, "learning_rate": 9.622323265133075e-06, "loss": 0.4748, "step": 1511 }, { "epoch": 0.6368103327249754, "grad_norm": 0.5405491590499878, "learning_rate": 9.62138825205206e-06, "loss": 0.545, "step": 1512 }, { "epoch": 0.6372315035799523, "grad_norm": 0.5144608616828918, "learning_rate": 9.620452128539858e-06, "loss": 0.4321, "step": 1513 }, { "epoch": 0.6376526744349291, "grad_norm": 0.45530471205711365, "learning_rate": 9.619514894821404e-06, "loss": 0.4753, "step": 1514 }, { "epoch": 0.638073845289906, "grad_norm": 0.5239183902740479, "learning_rate": 9.618576551121901e-06, "loss": 0.5114, "step": 1515 }, { "epoch": 0.6384950161448828, "grad_norm": 0.5106247067451477, "learning_rate": 9.617637097666809e-06, "loss": 0.4683, "step": 1516 }, { "epoch": 0.6389161869998596, "grad_norm": 0.47434452176094055, "learning_rate": 9.616696534681863e-06, "loss": 0.4708, "step": 1517 }, { "epoch": 0.6393373578548365, "grad_norm": 0.5492403507232666, "learning_rate": 9.615754862393066e-06, "loss": 0.5014, "step": 1518 }, { "epoch": 0.6397585287098133, "grad_norm": 0.5133352875709534, "learning_rate": 9.614812081026678e-06, "loss": 0.4886, "step": 1519 }, { "epoch": 0.6401796995647902, "grad_norm": 0.5134861469268799, "learning_rate": 9.613868190809235e-06, "loss": 0.5027, "step": 1520 }, { "epoch": 0.640600870419767, "grad_norm": 0.5557296276092529, "learning_rate": 9.612923191967536e-06, "loss": 0.5149, "step": 1521 }, { "epoch": 0.6410220412747438, "grad_norm": 0.5507279634475708, "learning_rate": 9.611977084728644e-06, "loss": 0.4851, "step": 1522 }, { "epoch": 0.6414432121297207, "grad_norm": 0.5034803152084351, "learning_rate": 9.61102986931989e-06, "loss": 0.4743, "step": 1523 }, { "epoch": 0.6418643829846975, "grad_norm": 0.5477747321128845, "learning_rate": 9.610081545968874e-06, "loss": 0.4851, "step": 1524 }, { "epoch": 0.6422855538396743, "grad_norm": 0.5329586267471313, "learning_rate": 9.609132114903458e-06, "loss": 0.4981, "step": 1525 }, { "epoch": 0.6427067246946512, "grad_norm": 0.5028325319290161, "learning_rate": 9.608181576351775e-06, "loss": 0.4889, "step": 1526 }, { "epoch": 0.643127895549628, "grad_norm": 0.5581250786781311, "learning_rate": 9.60722993054222e-06, "loss": 0.5021, "step": 1527 }, { "epoch": 0.6435490664046049, "grad_norm": 0.5128778219223022, "learning_rate": 9.606277177703452e-06, "loss": 0.4889, "step": 1528 }, { "epoch": 0.6439702372595817, "grad_norm": 0.4868183135986328, "learning_rate": 9.605323318064403e-06, "loss": 0.5104, "step": 1529 }, { "epoch": 0.6443914081145584, "grad_norm": 0.4836429953575134, "learning_rate": 9.604368351854266e-06, "loss": 0.4964, "step": 1530 }, { "epoch": 0.6448125789695353, "grad_norm": 0.5003650188446045, "learning_rate": 9.603412279302502e-06, "loss": 0.4924, "step": 1531 }, { "epoch": 0.6452337498245121, "grad_norm": 0.522510826587677, "learning_rate": 9.602455100638837e-06, "loss": 0.477, "step": 1532 }, { "epoch": 0.6456549206794889, "grad_norm": 0.4533410966396332, "learning_rate": 9.60149681609326e-06, "loss": 0.452, "step": 1533 }, { "epoch": 0.6460760915344658, "grad_norm": 0.45994508266448975, "learning_rate": 9.600537425896032e-06, "loss": 0.497, "step": 1534 }, { "epoch": 0.6464972623894426, "grad_norm": 0.5317679643630981, "learning_rate": 9.599576930277673e-06, "loss": 0.5179, "step": 1535 }, { "epoch": 0.6469184332444194, "grad_norm": 0.5180945992469788, "learning_rate": 9.598615329468975e-06, "loss": 0.5375, "step": 1536 }, { "epoch": 0.6473396040993963, "grad_norm": 0.4342954158782959, "learning_rate": 9.597652623700988e-06, "loss": 0.4703, "step": 1537 }, { "epoch": 0.6477607749543731, "grad_norm": 0.5181571245193481, "learning_rate": 9.596688813205038e-06, "loss": 0.5061, "step": 1538 }, { "epoch": 0.64818194580935, "grad_norm": 0.509190022945404, "learning_rate": 9.595723898212704e-06, "loss": 0.5148, "step": 1539 }, { "epoch": 0.6486031166643268, "grad_norm": 0.4861181676387787, "learning_rate": 9.594757878955838e-06, "loss": 0.5232, "step": 1540 }, { "epoch": 0.6490242875193036, "grad_norm": 0.5195705890655518, "learning_rate": 9.59379075566656e-06, "loss": 0.4985, "step": 1541 }, { "epoch": 0.6494454583742805, "grad_norm": 0.5077134370803833, "learning_rate": 9.592822528577246e-06, "loss": 0.4638, "step": 1542 }, { "epoch": 0.6498666292292573, "grad_norm": 0.4682711064815521, "learning_rate": 9.591853197920546e-06, "loss": 0.4946, "step": 1543 }, { "epoch": 0.6502878000842341, "grad_norm": 0.49766257405281067, "learning_rate": 9.59088276392937e-06, "loss": 0.5228, "step": 1544 }, { "epoch": 0.650708970939211, "grad_norm": 0.5287031531333923, "learning_rate": 9.589911226836895e-06, "loss": 0.4847, "step": 1545 }, { "epoch": 0.6511301417941878, "grad_norm": 0.4790147840976715, "learning_rate": 9.588938586876564e-06, "loss": 0.4716, "step": 1546 }, { "epoch": 0.6515513126491647, "grad_norm": 0.4664263129234314, "learning_rate": 9.587964844282081e-06, "loss": 0.5162, "step": 1547 }, { "epoch": 0.6519724835041415, "grad_norm": 0.5028733015060425, "learning_rate": 9.586989999287419e-06, "loss": 0.4669, "step": 1548 }, { "epoch": 0.6523936543591183, "grad_norm": 0.4779368042945862, "learning_rate": 9.586014052126817e-06, "loss": 0.5162, "step": 1549 }, { "epoch": 0.6528148252140952, "grad_norm": 0.44698187708854675, "learning_rate": 9.585037003034773e-06, "loss": 0.478, "step": 1550 }, { "epoch": 0.653235996069072, "grad_norm": 0.4777248799800873, "learning_rate": 9.584058852246056e-06, "loss": 0.5232, "step": 1551 }, { "epoch": 0.6536571669240488, "grad_norm": 0.489510715007782, "learning_rate": 9.583079599995694e-06, "loss": 0.4721, "step": 1552 }, { "epoch": 0.6540783377790257, "grad_norm": 0.48647063970565796, "learning_rate": 9.582099246518985e-06, "loss": 0.4767, "step": 1553 }, { "epoch": 0.6544995086340025, "grad_norm": 0.4602716565132141, "learning_rate": 9.581117792051487e-06, "loss": 0.5095, "step": 1554 }, { "epoch": 0.6549206794889794, "grad_norm": 0.4360570013523102, "learning_rate": 9.580135236829027e-06, "loss": 0.4696, "step": 1555 }, { "epoch": 0.6553418503439562, "grad_norm": 0.5493904948234558, "learning_rate": 9.579151581087693e-06, "loss": 0.4988, "step": 1556 }, { "epoch": 0.655763021198933, "grad_norm": 0.5394995212554932, "learning_rate": 9.57816682506384e-06, "loss": 0.5332, "step": 1557 }, { "epoch": 0.6561841920539099, "grad_norm": 0.4549788534641266, "learning_rate": 9.577180968994081e-06, "loss": 0.4611, "step": 1558 }, { "epoch": 0.6566053629088867, "grad_norm": 0.487972617149353, "learning_rate": 9.576194013115308e-06, "loss": 0.4864, "step": 1559 }, { "epoch": 0.6570265337638636, "grad_norm": 0.5462680459022522, "learning_rate": 9.575205957664657e-06, "loss": 0.4921, "step": 1560 }, { "epoch": 0.6574477046188404, "grad_norm": 0.5064077973365784, "learning_rate": 9.574216802879547e-06, "loss": 0.4759, "step": 1561 }, { "epoch": 0.6578688754738172, "grad_norm": 0.4440362751483917, "learning_rate": 9.573226548997647e-06, "loss": 0.4833, "step": 1562 }, { "epoch": 0.6582900463287941, "grad_norm": 0.47809037566185, "learning_rate": 9.5722351962569e-06, "loss": 0.4875, "step": 1563 }, { "epoch": 0.6587112171837709, "grad_norm": 0.4887414574623108, "learning_rate": 9.57124274489551e-06, "loss": 0.4798, "step": 1564 }, { "epoch": 0.6591323880387477, "grad_norm": 0.4531790018081665, "learning_rate": 9.57024919515194e-06, "loss": 0.4917, "step": 1565 }, { "epoch": 0.6595535588937246, "grad_norm": 0.47235307097435, "learning_rate": 9.569254547264922e-06, "loss": 0.4944, "step": 1566 }, { "epoch": 0.6599747297487014, "grad_norm": 0.47265908122062683, "learning_rate": 9.568258801473452e-06, "loss": 0.5038, "step": 1567 }, { "epoch": 0.6603959006036783, "grad_norm": 0.5295131206512451, "learning_rate": 9.567261958016788e-06, "loss": 0.5121, "step": 1568 }, { "epoch": 0.6608170714586551, "grad_norm": 0.5243860483169556, "learning_rate": 9.566264017134455e-06, "loss": 0.5014, "step": 1569 }, { "epoch": 0.6612382423136319, "grad_norm": 0.45099860429763794, "learning_rate": 9.565264979066235e-06, "loss": 0.4842, "step": 1570 }, { "epoch": 0.6616594131686088, "grad_norm": 0.5109383463859558, "learning_rate": 9.564264844052183e-06, "loss": 0.533, "step": 1571 }, { "epoch": 0.6620805840235856, "grad_norm": 0.4736783802509308, "learning_rate": 9.563263612332605e-06, "loss": 0.4693, "step": 1572 }, { "epoch": 0.6625017548785624, "grad_norm": 0.44382110238075256, "learning_rate": 9.562261284148083e-06, "loss": 0.4393, "step": 1573 }, { "epoch": 0.6629229257335393, "grad_norm": 0.47629088163375854, "learning_rate": 9.561257859739456e-06, "loss": 0.4935, "step": 1574 }, { "epoch": 0.6633440965885161, "grad_norm": 0.5392771363258362, "learning_rate": 9.56025333934783e-06, "loss": 0.5186, "step": 1575 }, { "epoch": 0.663765267443493, "grad_norm": 0.4572782516479492, "learning_rate": 9.559247723214566e-06, "loss": 0.4603, "step": 1576 }, { "epoch": 0.6641864382984698, "grad_norm": 0.4378502368927002, "learning_rate": 9.558241011581302e-06, "loss": 0.4328, "step": 1577 }, { "epoch": 0.6646076091534466, "grad_norm": 0.5533697009086609, "learning_rate": 9.557233204689926e-06, "loss": 0.5066, "step": 1578 }, { "epoch": 0.6650287800084235, "grad_norm": 0.5178079009056091, "learning_rate": 9.556224302782595e-06, "loss": 0.5072, "step": 1579 }, { "epoch": 0.6654499508634003, "grad_norm": 0.46432575583457947, "learning_rate": 9.555214306101732e-06, "loss": 0.5102, "step": 1580 }, { "epoch": 0.665871121718377, "grad_norm": 0.5134593844413757, "learning_rate": 9.554203214890018e-06, "loss": 0.4448, "step": 1581 }, { "epoch": 0.6662922925733539, "grad_norm": 0.43831759691238403, "learning_rate": 9.553191029390399e-06, "loss": 0.4704, "step": 1582 }, { "epoch": 0.6667134634283307, "grad_norm": 0.42907872796058655, "learning_rate": 9.552177749846083e-06, "loss": 0.4773, "step": 1583 }, { "epoch": 0.6671346342833075, "grad_norm": 0.46465471386909485, "learning_rate": 9.551163376500543e-06, "loss": 0.4832, "step": 1584 }, { "epoch": 0.6675558051382844, "grad_norm": 0.42675086855888367, "learning_rate": 9.550147909597513e-06, "loss": 0.4929, "step": 1585 }, { "epoch": 0.6679769759932612, "grad_norm": 0.4596293270587921, "learning_rate": 9.54913134938099e-06, "loss": 0.4756, "step": 1586 }, { "epoch": 0.6683981468482381, "grad_norm": 0.5013664364814758, "learning_rate": 9.548113696095235e-06, "loss": 0.5095, "step": 1587 }, { "epoch": 0.6688193177032149, "grad_norm": 0.5655247569084167, "learning_rate": 9.54709494998477e-06, "loss": 0.5457, "step": 1588 }, { "epoch": 0.6692404885581917, "grad_norm": 0.4396232068538666, "learning_rate": 9.546075111294378e-06, "loss": 0.4552, "step": 1589 }, { "epoch": 0.6696616594131686, "grad_norm": 0.48095571994781494, "learning_rate": 9.545054180269111e-06, "loss": 0.4986, "step": 1590 }, { "epoch": 0.6700828302681454, "grad_norm": 0.49636557698249817, "learning_rate": 9.544032157154276e-06, "loss": 0.4921, "step": 1591 }, { "epoch": 0.6705040011231223, "grad_norm": 0.4257407784461975, "learning_rate": 9.543009042195446e-06, "loss": 0.4686, "step": 1592 }, { "epoch": 0.6709251719780991, "grad_norm": 0.48315104842185974, "learning_rate": 9.541984835638457e-06, "loss": 0.4942, "step": 1593 }, { "epoch": 0.6713463428330759, "grad_norm": 0.5280078053474426, "learning_rate": 9.540959537729407e-06, "loss": 0.5343, "step": 1594 }, { "epoch": 0.6717675136880528, "grad_norm": 0.4428074359893799, "learning_rate": 9.539933148714652e-06, "loss": 0.484, "step": 1595 }, { "epoch": 0.6721886845430296, "grad_norm": 0.5832973718643188, "learning_rate": 9.538905668840818e-06, "loss": 0.517, "step": 1596 }, { "epoch": 0.6726098553980064, "grad_norm": 0.5508847236633301, "learning_rate": 9.537877098354787e-06, "loss": 0.5029, "step": 1597 }, { "epoch": 0.6730310262529833, "grad_norm": 0.5144817233085632, "learning_rate": 9.536847437503702e-06, "loss": 0.4909, "step": 1598 }, { "epoch": 0.6734521971079601, "grad_norm": 0.47960761189460754, "learning_rate": 9.535816686534976e-06, "loss": 0.4789, "step": 1599 }, { "epoch": 0.673873367962937, "grad_norm": 0.5287169218063354, "learning_rate": 9.534784845696274e-06, "loss": 0.462, "step": 1600 }, { "epoch": 0.6742945388179138, "grad_norm": 0.46352604031562805, "learning_rate": 9.53375191523553e-06, "loss": 0.5085, "step": 1601 }, { "epoch": 0.6747157096728906, "grad_norm": 0.47216999530792236, "learning_rate": 9.532717895400937e-06, "loss": 0.4737, "step": 1602 }, { "epoch": 0.6751368805278675, "grad_norm": 0.5035675764083862, "learning_rate": 9.53168278644095e-06, "loss": 0.4719, "step": 1603 }, { "epoch": 0.6755580513828443, "grad_norm": 0.5167648792266846, "learning_rate": 9.530646588604287e-06, "loss": 0.4966, "step": 1604 }, { "epoch": 0.6759792222378211, "grad_norm": 0.48950403928756714, "learning_rate": 9.529609302139924e-06, "loss": 0.4632, "step": 1605 }, { "epoch": 0.676400393092798, "grad_norm": 0.4414892792701721, "learning_rate": 9.528570927297105e-06, "loss": 0.4436, "step": 1606 }, { "epoch": 0.6768215639477748, "grad_norm": 0.5088380575180054, "learning_rate": 9.527531464325328e-06, "loss": 0.4791, "step": 1607 }, { "epoch": 0.6772427348027517, "grad_norm": 0.5027170181274414, "learning_rate": 9.526490913474358e-06, "loss": 0.4985, "step": 1608 }, { "epoch": 0.6776639056577285, "grad_norm": 0.4924210011959076, "learning_rate": 9.525449274994218e-06, "loss": 0.5065, "step": 1609 }, { "epoch": 0.6780850765127053, "grad_norm": 0.4705127477645874, "learning_rate": 9.524406549135195e-06, "loss": 0.5021, "step": 1610 }, { "epoch": 0.6785062473676822, "grad_norm": 0.5004907250404358, "learning_rate": 9.523362736147836e-06, "loss": 0.4693, "step": 1611 }, { "epoch": 0.678927418222659, "grad_norm": 0.49554556608200073, "learning_rate": 9.522317836282949e-06, "loss": 0.5059, "step": 1612 }, { "epoch": 0.6793485890776358, "grad_norm": 0.4802109897136688, "learning_rate": 9.521271849791602e-06, "loss": 0.5226, "step": 1613 }, { "epoch": 0.6797697599326127, "grad_norm": 0.4863351285457611, "learning_rate": 9.52022477692513e-06, "loss": 0.5149, "step": 1614 }, { "epoch": 0.6801909307875895, "grad_norm": 0.48570218682289124, "learning_rate": 9.519176617935119e-06, "loss": 0.4752, "step": 1615 }, { "epoch": 0.6806121016425664, "grad_norm": 0.45990708470344543, "learning_rate": 9.518127373073426e-06, "loss": 0.4815, "step": 1616 }, { "epoch": 0.6810332724975432, "grad_norm": 0.4443465769290924, "learning_rate": 9.51707704259216e-06, "loss": 0.4772, "step": 1617 }, { "epoch": 0.68145444335252, "grad_norm": 0.4609827697277069, "learning_rate": 9.5160256267437e-06, "loss": 0.4789, "step": 1618 }, { "epoch": 0.6818756142074969, "grad_norm": 0.4891040027141571, "learning_rate": 9.514973125780678e-06, "loss": 0.5026, "step": 1619 }, { "epoch": 0.6822967850624737, "grad_norm": 0.4350150525569916, "learning_rate": 9.513919539955993e-06, "loss": 0.4915, "step": 1620 }, { "epoch": 0.6827179559174505, "grad_norm": 0.49085184931755066, "learning_rate": 9.512864869522798e-06, "loss": 0.4775, "step": 1621 }, { "epoch": 0.6831391267724274, "grad_norm": 0.48263540863990784, "learning_rate": 9.51180911473451e-06, "loss": 0.4904, "step": 1622 }, { "epoch": 0.6835602976274042, "grad_norm": 0.5210782885551453, "learning_rate": 9.51075227584481e-06, "loss": 0.5116, "step": 1623 }, { "epoch": 0.6839814684823811, "grad_norm": 0.49459072947502136, "learning_rate": 9.509694353107634e-06, "loss": 0.5023, "step": 1624 }, { "epoch": 0.6844026393373579, "grad_norm": 0.5025594830513, "learning_rate": 9.50863534677718e-06, "loss": 0.5032, "step": 1625 }, { "epoch": 0.6848238101923347, "grad_norm": 0.5280804634094238, "learning_rate": 9.50757525710791e-06, "loss": 0.4803, "step": 1626 }, { "epoch": 0.6852449810473116, "grad_norm": 0.5421614646911621, "learning_rate": 9.50651408435454e-06, "loss": 0.505, "step": 1627 }, { "epoch": 0.6856661519022884, "grad_norm": 0.5381784439086914, "learning_rate": 9.50545182877205e-06, "loss": 0.5124, "step": 1628 }, { "epoch": 0.6860873227572652, "grad_norm": 0.46797627210617065, "learning_rate": 9.504388490615682e-06, "loss": 0.5086, "step": 1629 }, { "epoch": 0.6865084936122421, "grad_norm": 0.5129102468490601, "learning_rate": 9.503324070140933e-06, "loss": 0.5084, "step": 1630 }, { "epoch": 0.6869296644672188, "grad_norm": 0.4539909362792969, "learning_rate": 9.502258567603563e-06, "loss": 0.4818, "step": 1631 }, { "epoch": 0.6873508353221957, "grad_norm": 0.47126930952072144, "learning_rate": 9.501191983259596e-06, "loss": 0.4742, "step": 1632 }, { "epoch": 0.6877720061771725, "grad_norm": 0.4776802957057953, "learning_rate": 9.500124317365307e-06, "loss": 0.5045, "step": 1633 }, { "epoch": 0.6881931770321493, "grad_norm": 0.5414979457855225, "learning_rate": 9.499055570177237e-06, "loss": 0.5314, "step": 1634 }, { "epoch": 0.6886143478871262, "grad_norm": 0.4778410494327545, "learning_rate": 9.497985741952188e-06, "loss": 0.5046, "step": 1635 }, { "epoch": 0.689035518742103, "grad_norm": 0.468129426240921, "learning_rate": 9.496914832947215e-06, "loss": 0.4556, "step": 1636 }, { "epoch": 0.6894566895970798, "grad_norm": 0.5100683569908142, "learning_rate": 9.49584284341964e-06, "loss": 0.4616, "step": 1637 }, { "epoch": 0.6898778604520567, "grad_norm": 0.4770912826061249, "learning_rate": 9.494769773627042e-06, "loss": 0.4787, "step": 1638 }, { "epoch": 0.6902990313070335, "grad_norm": 0.42367297410964966, "learning_rate": 9.493695623827257e-06, "loss": 0.482, "step": 1639 }, { "epoch": 0.6907202021620104, "grad_norm": 0.44561997056007385, "learning_rate": 9.492620394278383e-06, "loss": 0.4657, "step": 1640 }, { "epoch": 0.6911413730169872, "grad_norm": 0.5336233973503113, "learning_rate": 9.491544085238778e-06, "loss": 0.4758, "step": 1641 }, { "epoch": 0.691562543871964, "grad_norm": 0.5015300512313843, "learning_rate": 9.490466696967058e-06, "loss": 0.5269, "step": 1642 }, { "epoch": 0.6919837147269409, "grad_norm": 0.49998658895492554, "learning_rate": 9.489388229722098e-06, "loss": 0.4899, "step": 1643 }, { "epoch": 0.6924048855819177, "grad_norm": 0.520885705947876, "learning_rate": 9.488308683763036e-06, "loss": 0.466, "step": 1644 }, { "epoch": 0.6928260564368945, "grad_norm": 0.48164668679237366, "learning_rate": 9.487228059349262e-06, "loss": 0.4445, "step": 1645 }, { "epoch": 0.6932472272918714, "grad_norm": 0.4363544285297394, "learning_rate": 9.486146356740433e-06, "loss": 0.4625, "step": 1646 }, { "epoch": 0.6936683981468482, "grad_norm": 0.5017496943473816, "learning_rate": 9.48506357619646e-06, "loss": 0.5129, "step": 1647 }, { "epoch": 0.6940895690018251, "grad_norm": 0.4792582392692566, "learning_rate": 9.483979717977513e-06, "loss": 0.4997, "step": 1648 }, { "epoch": 0.6945107398568019, "grad_norm": 0.5386309623718262, "learning_rate": 9.482894782344025e-06, "loss": 0.5159, "step": 1649 }, { "epoch": 0.6949319107117787, "grad_norm": 0.47766420245170593, "learning_rate": 9.481808769556685e-06, "loss": 0.4744, "step": 1650 }, { "epoch": 0.6953530815667556, "grad_norm": 0.4622989594936371, "learning_rate": 9.48072167987644e-06, "loss": 0.4727, "step": 1651 }, { "epoch": 0.6957742524217324, "grad_norm": 0.5083233118057251, "learning_rate": 9.479633513564494e-06, "loss": 0.49, "step": 1652 }, { "epoch": 0.6961954232767092, "grad_norm": 0.4747864902019501, "learning_rate": 9.478544270882318e-06, "loss": 0.4668, "step": 1653 }, { "epoch": 0.6966165941316861, "grad_norm": 0.4772510826587677, "learning_rate": 9.477453952091635e-06, "loss": 0.4764, "step": 1654 }, { "epoch": 0.6970377649866629, "grad_norm": 0.4789689779281616, "learning_rate": 9.476362557454423e-06, "loss": 0.4885, "step": 1655 }, { "epoch": 0.6974589358416398, "grad_norm": 0.4983753561973572, "learning_rate": 9.47527008723293e-06, "loss": 0.5024, "step": 1656 }, { "epoch": 0.6978801066966166, "grad_norm": 0.534274160861969, "learning_rate": 9.474176541689651e-06, "loss": 0.4981, "step": 1657 }, { "epoch": 0.6983012775515934, "grad_norm": 0.44744551181793213, "learning_rate": 9.473081921087346e-06, "loss": 0.4974, "step": 1658 }, { "epoch": 0.6987224484065703, "grad_norm": 0.4940304160118103, "learning_rate": 9.471986225689032e-06, "loss": 0.464, "step": 1659 }, { "epoch": 0.6991436192615471, "grad_norm": 0.46611911058425903, "learning_rate": 9.47088945575798e-06, "loss": 0.4649, "step": 1660 }, { "epoch": 0.699564790116524, "grad_norm": 0.5478931069374084, "learning_rate": 9.469791611557728e-06, "loss": 0.5333, "step": 1661 }, { "epoch": 0.6999859609715008, "grad_norm": 0.4513242244720459, "learning_rate": 9.468692693352063e-06, "loss": 0.4991, "step": 1662 }, { "epoch": 0.7004071318264776, "grad_norm": 0.4953286945819855, "learning_rate": 9.467592701405035e-06, "loss": 0.5136, "step": 1663 }, { "epoch": 0.7008283026814545, "grad_norm": 0.4951252043247223, "learning_rate": 9.466491635980952e-06, "loss": 0.4829, "step": 1664 }, { "epoch": 0.7012494735364313, "grad_norm": 0.5258673429489136, "learning_rate": 9.46538949734438e-06, "loss": 0.4883, "step": 1665 }, { "epoch": 0.7016706443914081, "grad_norm": 0.47120311856269836, "learning_rate": 9.464286285760138e-06, "loss": 0.4837, "step": 1666 }, { "epoch": 0.702091815246385, "grad_norm": 0.5564139485359192, "learning_rate": 9.463182001493309e-06, "loss": 0.4755, "step": 1667 }, { "epoch": 0.7025129861013618, "grad_norm": 0.5117712020874023, "learning_rate": 9.462076644809233e-06, "loss": 0.5154, "step": 1668 }, { "epoch": 0.7029341569563387, "grad_norm": 0.4885038137435913, "learning_rate": 9.460970215973502e-06, "loss": 0.4695, "step": 1669 }, { "epoch": 0.7033553278113155, "grad_norm": 0.5110191702842712, "learning_rate": 9.459862715251973e-06, "loss": 0.4738, "step": 1670 }, { "epoch": 0.7037764986662923, "grad_norm": 0.4595445394515991, "learning_rate": 9.458754142910756e-06, "loss": 0.4532, "step": 1671 }, { "epoch": 0.7041976695212692, "grad_norm": 0.4265916049480438, "learning_rate": 9.457644499216221e-06, "loss": 0.4538, "step": 1672 }, { "epoch": 0.704618840376246, "grad_norm": 0.4791349172592163, "learning_rate": 9.456533784434992e-06, "loss": 0.4808, "step": 1673 }, { "epoch": 0.7050400112312228, "grad_norm": 0.4613744914531708, "learning_rate": 9.455421998833954e-06, "loss": 0.4809, "step": 1674 }, { "epoch": 0.7054611820861997, "grad_norm": 0.5199161171913147, "learning_rate": 9.454309142680247e-06, "loss": 0.4724, "step": 1675 }, { "epoch": 0.7058823529411765, "grad_norm": 0.46802905201911926, "learning_rate": 9.453195216241268e-06, "loss": 0.5044, "step": 1676 }, { "epoch": 0.7063035237961534, "grad_norm": 0.456076055765152, "learning_rate": 9.452080219784676e-06, "loss": 0.478, "step": 1677 }, { "epoch": 0.7067246946511302, "grad_norm": 0.5053601264953613, "learning_rate": 9.450964153578381e-06, "loss": 0.495, "step": 1678 }, { "epoch": 0.707145865506107, "grad_norm": 0.5062270760536194, "learning_rate": 9.449847017890553e-06, "loss": 0.5009, "step": 1679 }, { "epoch": 0.7075670363610839, "grad_norm": 0.49002063274383545, "learning_rate": 9.448728812989615e-06, "loss": 0.4801, "step": 1680 }, { "epoch": 0.7079882072160606, "grad_norm": 0.47189560532569885, "learning_rate": 9.447609539144254e-06, "loss": 0.4702, "step": 1681 }, { "epoch": 0.7084093780710374, "grad_norm": 0.5032925605773926, "learning_rate": 9.44648919662341e-06, "loss": 0.4665, "step": 1682 }, { "epoch": 0.7088305489260143, "grad_norm": 0.5179406404495239, "learning_rate": 9.445367785696279e-06, "loss": 0.4959, "step": 1683 }, { "epoch": 0.7092517197809911, "grad_norm": 0.46953943371772766, "learning_rate": 9.444245306632312e-06, "loss": 0.4903, "step": 1684 }, { "epoch": 0.709672890635968, "grad_norm": 0.49530449509620667, "learning_rate": 9.443121759701223e-06, "loss": 0.4852, "step": 1685 }, { "epoch": 0.7100940614909448, "grad_norm": 0.5850326418876648, "learning_rate": 9.441997145172979e-06, "loss": 0.4724, "step": 1686 }, { "epoch": 0.7105152323459216, "grad_norm": 0.43924984335899353, "learning_rate": 9.4408714633178e-06, "loss": 0.4648, "step": 1687 }, { "epoch": 0.7109364032008985, "grad_norm": 0.5159868001937866, "learning_rate": 9.439744714406167e-06, "loss": 0.4823, "step": 1688 }, { "epoch": 0.7113575740558753, "grad_norm": 0.5051754117012024, "learning_rate": 9.438616898708818e-06, "loss": 0.4416, "step": 1689 }, { "epoch": 0.7117787449108521, "grad_norm": 0.4755825102329254, "learning_rate": 9.437488016496747e-06, "loss": 0.4728, "step": 1690 }, { "epoch": 0.712199915765829, "grad_norm": 0.5056664943695068, "learning_rate": 9.436358068041198e-06, "loss": 0.4903, "step": 1691 }, { "epoch": 0.7126210866208058, "grad_norm": 0.5353665351867676, "learning_rate": 9.435227053613678e-06, "loss": 0.4999, "step": 1692 }, { "epoch": 0.7130422574757826, "grad_norm": 0.5257391929626465, "learning_rate": 9.434094973485947e-06, "loss": 0.4979, "step": 1693 }, { "epoch": 0.7134634283307595, "grad_norm": 0.5250925421714783, "learning_rate": 9.432961827930028e-06, "loss": 0.5091, "step": 1694 }, { "epoch": 0.7138845991857363, "grad_norm": 0.5157696604728699, "learning_rate": 9.431827617218186e-06, "loss": 0.501, "step": 1695 }, { "epoch": 0.7143057700407132, "grad_norm": 0.4926456809043884, "learning_rate": 9.430692341622954e-06, "loss": 0.4869, "step": 1696 }, { "epoch": 0.71472694089569, "grad_norm": 0.5118269324302673, "learning_rate": 9.42955600141712e-06, "loss": 0.4829, "step": 1697 }, { "epoch": 0.7151481117506668, "grad_norm": 0.466407835483551, "learning_rate": 9.428418596873718e-06, "loss": 0.4792, "step": 1698 }, { "epoch": 0.7155692826056437, "grad_norm": 0.4399794042110443, "learning_rate": 9.427280128266049e-06, "loss": 0.4839, "step": 1699 }, { "epoch": 0.7159904534606205, "grad_norm": 0.4923059046268463, "learning_rate": 9.426140595867667e-06, "loss": 0.5049, "step": 1700 }, { "epoch": 0.7164116243155974, "grad_norm": 0.48168015480041504, "learning_rate": 9.424999999952375e-06, "loss": 0.4917, "step": 1701 }, { "epoch": 0.7168327951705742, "grad_norm": 0.46348240971565247, "learning_rate": 9.423858340794241e-06, "loss": 0.4827, "step": 1702 }, { "epoch": 0.717253966025551, "grad_norm": 0.4524596929550171, "learning_rate": 9.422715618667579e-06, "loss": 0.4848, "step": 1703 }, { "epoch": 0.7176751368805279, "grad_norm": 0.49993401765823364, "learning_rate": 9.421571833846967e-06, "loss": 0.4992, "step": 1704 }, { "epoch": 0.7180963077355047, "grad_norm": 0.47533079981803894, "learning_rate": 9.420426986607234e-06, "loss": 0.4919, "step": 1705 }, { "epoch": 0.7185174785904815, "grad_norm": 0.5366756916046143, "learning_rate": 9.419281077223463e-06, "loss": 0.5161, "step": 1706 }, { "epoch": 0.7189386494454584, "grad_norm": 0.5327591300010681, "learning_rate": 9.418134105970996e-06, "loss": 0.5022, "step": 1707 }, { "epoch": 0.7193598203004352, "grad_norm": 0.47102171182632446, "learning_rate": 9.41698607312543e-06, "loss": 0.4565, "step": 1708 }, { "epoch": 0.719780991155412, "grad_norm": 0.535873532295227, "learning_rate": 9.415836978962609e-06, "loss": 0.4918, "step": 1709 }, { "epoch": 0.7202021620103889, "grad_norm": 0.48156967759132385, "learning_rate": 9.414686823758645e-06, "loss": 0.4601, "step": 1710 }, { "epoch": 0.7206233328653657, "grad_norm": 0.5102245211601257, "learning_rate": 9.413535607789894e-06, "loss": 0.4894, "step": 1711 }, { "epoch": 0.7210445037203426, "grad_norm": 0.5447707772254944, "learning_rate": 9.412383331332975e-06, "loss": 0.5074, "step": 1712 }, { "epoch": 0.7214656745753194, "grad_norm": 0.4694513976573944, "learning_rate": 9.411229994664754e-06, "loss": 0.46, "step": 1713 }, { "epoch": 0.7218868454302962, "grad_norm": 0.5556673407554626, "learning_rate": 9.410075598062357e-06, "loss": 0.5089, "step": 1714 }, { "epoch": 0.7223080162852731, "grad_norm": 0.4782800078392029, "learning_rate": 9.408920141803167e-06, "loss": 0.4719, "step": 1715 }, { "epoch": 0.7227291871402499, "grad_norm": 0.5088542699813843, "learning_rate": 9.407763626164812e-06, "loss": 0.4861, "step": 1716 }, { "epoch": 0.7231503579952268, "grad_norm": 0.5041608810424805, "learning_rate": 9.406606051425185e-06, "loss": 0.487, "step": 1717 }, { "epoch": 0.7235715288502036, "grad_norm": 0.4769231975078583, "learning_rate": 9.405447417862427e-06, "loss": 0.4182, "step": 1718 }, { "epoch": 0.7239926997051804, "grad_norm": 0.5312278270721436, "learning_rate": 9.404287725754938e-06, "loss": 0.4976, "step": 1719 }, { "epoch": 0.7244138705601573, "grad_norm": 0.5179927349090576, "learning_rate": 9.403126975381366e-06, "loss": 0.5483, "step": 1720 }, { "epoch": 0.7248350414151341, "grad_norm": 0.5032543540000916, "learning_rate": 9.40196516702062e-06, "loss": 0.5106, "step": 1721 }, { "epoch": 0.725256212270111, "grad_norm": 0.46921253204345703, "learning_rate": 9.400802300951858e-06, "loss": 0.5017, "step": 1722 }, { "epoch": 0.7256773831250878, "grad_norm": 0.4430667757987976, "learning_rate": 9.399638377454496e-06, "loss": 0.4758, "step": 1723 }, { "epoch": 0.7260985539800646, "grad_norm": 0.5271345973014832, "learning_rate": 9.398473396808203e-06, "loss": 0.5019, "step": 1724 }, { "epoch": 0.7265197248350415, "grad_norm": 0.4804336130619049, "learning_rate": 9.397307359292902e-06, "loss": 0.4937, "step": 1725 }, { "epoch": 0.7269408956900183, "grad_norm": 0.4400876760482788, "learning_rate": 9.396140265188768e-06, "loss": 0.4487, "step": 1726 }, { "epoch": 0.7273620665449951, "grad_norm": 0.4317844808101654, "learning_rate": 9.394972114776231e-06, "loss": 0.4531, "step": 1727 }, { "epoch": 0.727783237399972, "grad_norm": 0.5189148783683777, "learning_rate": 9.393802908335978e-06, "loss": 0.503, "step": 1728 }, { "epoch": 0.7282044082549488, "grad_norm": 0.4662054181098938, "learning_rate": 9.392632646148943e-06, "loss": 0.4972, "step": 1729 }, { "epoch": 0.7286255791099256, "grad_norm": 0.5224161148071289, "learning_rate": 9.391461328496319e-06, "loss": 0.529, "step": 1730 }, { "epoch": 0.7290467499649024, "grad_norm": 0.45670247077941895, "learning_rate": 9.390288955659554e-06, "loss": 0.4892, "step": 1731 }, { "epoch": 0.7294679208198792, "grad_norm": 0.47732141613960266, "learning_rate": 9.389115527920344e-06, "loss": 0.4957, "step": 1732 }, { "epoch": 0.729889091674856, "grad_norm": 0.4807876944541931, "learning_rate": 9.387941045560641e-06, "loss": 0.509, "step": 1733 }, { "epoch": 0.7303102625298329, "grad_norm": 0.48666682839393616, "learning_rate": 9.386765508862652e-06, "loss": 0.4907, "step": 1734 }, { "epoch": 0.7307314333848097, "grad_norm": 0.5558112859725952, "learning_rate": 9.385588918108836e-06, "loss": 0.5256, "step": 1735 }, { "epoch": 0.7311526042397866, "grad_norm": 0.5505550503730774, "learning_rate": 9.384411273581904e-06, "loss": 0.5271, "step": 1736 }, { "epoch": 0.7315737750947634, "grad_norm": 0.48267123103141785, "learning_rate": 9.383232575564823e-06, "loss": 0.4738, "step": 1737 }, { "epoch": 0.7319949459497402, "grad_norm": 0.44836175441741943, "learning_rate": 9.382052824340807e-06, "loss": 0.4824, "step": 1738 }, { "epoch": 0.7324161168047171, "grad_norm": 0.5017287731170654, "learning_rate": 9.380872020193337e-06, "loss": 0.4955, "step": 1739 }, { "epoch": 0.7328372876596939, "grad_norm": 0.508121907711029, "learning_rate": 9.379690163406129e-06, "loss": 0.493, "step": 1740 }, { "epoch": 0.7332584585146708, "grad_norm": 0.49494779109954834, "learning_rate": 9.378507254263164e-06, "loss": 0.4692, "step": 1741 }, { "epoch": 0.7336796293696476, "grad_norm": 0.48896098136901855, "learning_rate": 9.377323293048672e-06, "loss": 0.4752, "step": 1742 }, { "epoch": 0.7341008002246244, "grad_norm": 0.48930275440216064, "learning_rate": 9.376138280047136e-06, "loss": 0.4849, "step": 1743 }, { "epoch": 0.7345219710796013, "grad_norm": 0.48123878240585327, "learning_rate": 9.374952215543293e-06, "loss": 0.4713, "step": 1744 }, { "epoch": 0.7349431419345781, "grad_norm": 0.49505770206451416, "learning_rate": 9.37376509982213e-06, "loss": 0.4662, "step": 1745 }, { "epoch": 0.7353643127895549, "grad_norm": 0.46169304847717285, "learning_rate": 9.37257693316889e-06, "loss": 0.5022, "step": 1746 }, { "epoch": 0.7357854836445318, "grad_norm": 0.46422600746154785, "learning_rate": 9.371387715869066e-06, "loss": 0.4665, "step": 1747 }, { "epoch": 0.7362066544995086, "grad_norm": 0.4900830090045929, "learning_rate": 9.370197448208403e-06, "loss": 0.4802, "step": 1748 }, { "epoch": 0.7366278253544855, "grad_norm": 0.4494643807411194, "learning_rate": 9.3690061304729e-06, "loss": 0.4963, "step": 1749 }, { "epoch": 0.7370489962094623, "grad_norm": 0.5285513997077942, "learning_rate": 9.367813762948809e-06, "loss": 0.4729, "step": 1750 }, { "epoch": 0.7374701670644391, "grad_norm": 0.4363535940647125, "learning_rate": 9.366620345922632e-06, "loss": 0.4759, "step": 1751 }, { "epoch": 0.737891337919416, "grad_norm": 0.5493115186691284, "learning_rate": 9.365425879681126e-06, "loss": 0.5254, "step": 1752 }, { "epoch": 0.7383125087743928, "grad_norm": 0.5460651516914368, "learning_rate": 9.364230364511296e-06, "loss": 0.5191, "step": 1753 }, { "epoch": 0.7387336796293696, "grad_norm": 0.4443608522415161, "learning_rate": 9.363033800700403e-06, "loss": 0.4671, "step": 1754 }, { "epoch": 0.7391548504843465, "grad_norm": 0.5067112445831299, "learning_rate": 9.361836188535957e-06, "loss": 0.4872, "step": 1755 }, { "epoch": 0.7395760213393233, "grad_norm": 0.45451122522354126, "learning_rate": 9.360637528305723e-06, "loss": 0.4705, "step": 1756 }, { "epoch": 0.7399971921943002, "grad_norm": 0.5022176504135132, "learning_rate": 9.359437820297716e-06, "loss": 0.4916, "step": 1757 }, { "epoch": 0.740418363049277, "grad_norm": 0.44317710399627686, "learning_rate": 9.358237064800202e-06, "loss": 0.4633, "step": 1758 }, { "epoch": 0.7408395339042538, "grad_norm": 0.4861590564250946, "learning_rate": 9.357035262101699e-06, "loss": 0.4938, "step": 1759 }, { "epoch": 0.7412607047592307, "grad_norm": 0.43544721603393555, "learning_rate": 9.355832412490976e-06, "loss": 0.4794, "step": 1760 }, { "epoch": 0.7416818756142075, "grad_norm": 0.5336884260177612, "learning_rate": 9.35462851625706e-06, "loss": 0.4917, "step": 1761 }, { "epoch": 0.7421030464691843, "grad_norm": 0.47101667523384094, "learning_rate": 9.35342357368922e-06, "loss": 0.494, "step": 1762 }, { "epoch": 0.7425242173241612, "grad_norm": 0.5482058525085449, "learning_rate": 9.352217585076981e-06, "loss": 0.4918, "step": 1763 }, { "epoch": 0.742945388179138, "grad_norm": 0.5469294190406799, "learning_rate": 9.35101055071012e-06, "loss": 0.5033, "step": 1764 }, { "epoch": 0.7433665590341149, "grad_norm": 0.4924775958061218, "learning_rate": 9.349802470878665e-06, "loss": 0.5156, "step": 1765 }, { "epoch": 0.7437877298890917, "grad_norm": 0.515329122543335, "learning_rate": 9.348593345872892e-06, "loss": 0.4899, "step": 1766 }, { "epoch": 0.7442089007440685, "grad_norm": 0.4527454972267151, "learning_rate": 9.347383175983333e-06, "loss": 0.4994, "step": 1767 }, { "epoch": 0.7446300715990454, "grad_norm": 0.429932564496994, "learning_rate": 9.346171961500766e-06, "loss": 0.4653, "step": 1768 }, { "epoch": 0.7450512424540222, "grad_norm": 0.5019491910934448, "learning_rate": 9.344959702716225e-06, "loss": 0.4901, "step": 1769 }, { "epoch": 0.745472413308999, "grad_norm": 0.4842943549156189, "learning_rate": 9.343746399920993e-06, "loss": 0.466, "step": 1770 }, { "epoch": 0.7458935841639759, "grad_norm": 0.5010812878608704, "learning_rate": 9.342532053406601e-06, "loss": 0.4764, "step": 1771 }, { "epoch": 0.7463147550189527, "grad_norm": 0.4486219882965088, "learning_rate": 9.341316663464835e-06, "loss": 0.4652, "step": 1772 }, { "epoch": 0.7467359258739296, "grad_norm": 0.4505569338798523, "learning_rate": 9.34010023038773e-06, "loss": 0.4673, "step": 1773 }, { "epoch": 0.7471570967289064, "grad_norm": 0.4783919155597687, "learning_rate": 9.33888275446757e-06, "loss": 0.4957, "step": 1774 }, { "epoch": 0.7475782675838832, "grad_norm": 0.5336446762084961, "learning_rate": 9.337664235996893e-06, "loss": 0.511, "step": 1775 }, { "epoch": 0.7479994384388601, "grad_norm": 0.508265495300293, "learning_rate": 9.336444675268485e-06, "loss": 0.497, "step": 1776 }, { "epoch": 0.7484206092938369, "grad_norm": 0.5492038130760193, "learning_rate": 9.335224072575381e-06, "loss": 0.481, "step": 1777 }, { "epoch": 0.7488417801488138, "grad_norm": 0.46039724349975586, "learning_rate": 9.334002428210874e-06, "loss": 0.47, "step": 1778 }, { "epoch": 0.7492629510037906, "grad_norm": 0.48249679803848267, "learning_rate": 9.332779742468496e-06, "loss": 0.5234, "step": 1779 }, { "epoch": 0.7496841218587674, "grad_norm": 0.500920832157135, "learning_rate": 9.331556015642037e-06, "loss": 0.5096, "step": 1780 }, { "epoch": 0.7501052927137442, "grad_norm": 0.49142998456954956, "learning_rate": 9.330331248025537e-06, "loss": 0.5116, "step": 1781 }, { "epoch": 0.750526463568721, "grad_norm": 0.5027164220809937, "learning_rate": 9.329105439913284e-06, "loss": 0.4724, "step": 1782 }, { "epoch": 0.7509476344236978, "grad_norm": 0.4396772086620331, "learning_rate": 9.327878591599812e-06, "loss": 0.448, "step": 1783 }, { "epoch": 0.7513688052786747, "grad_norm": 0.5308758020401001, "learning_rate": 9.326650703379913e-06, "loss": 0.47, "step": 1784 }, { "epoch": 0.7517899761336515, "grad_norm": 0.4902430772781372, "learning_rate": 9.325421775548625e-06, "loss": 0.5029, "step": 1785 }, { "epoch": 0.7522111469886283, "grad_norm": 0.5037869215011597, "learning_rate": 9.324191808401235e-06, "loss": 0.4889, "step": 1786 }, { "epoch": 0.7526323178436052, "grad_norm": 0.4408358335494995, "learning_rate": 9.322960802233279e-06, "loss": 0.4428, "step": 1787 }, { "epoch": 0.753053488698582, "grad_norm": 0.4556630849838257, "learning_rate": 9.321728757340546e-06, "loss": 0.4756, "step": 1788 }, { "epoch": 0.7534746595535589, "grad_norm": 0.47274336218833923, "learning_rate": 9.320495674019074e-06, "loss": 0.4678, "step": 1789 }, { "epoch": 0.7538958304085357, "grad_norm": 0.4772375524044037, "learning_rate": 9.319261552565147e-06, "loss": 0.4997, "step": 1790 }, { "epoch": 0.7543170012635125, "grad_norm": 0.47642138600349426, "learning_rate": 9.318026393275304e-06, "loss": 0.4616, "step": 1791 }, { "epoch": 0.7547381721184894, "grad_norm": 0.4783822000026703, "learning_rate": 9.316790196446325e-06, "loss": 0.4834, "step": 1792 }, { "epoch": 0.7551593429734662, "grad_norm": 0.49430012702941895, "learning_rate": 9.315552962375248e-06, "loss": 0.4677, "step": 1793 }, { "epoch": 0.755580513828443, "grad_norm": 0.45183971524238586, "learning_rate": 9.314314691359357e-06, "loss": 0.4864, "step": 1794 }, { "epoch": 0.7560016846834199, "grad_norm": 0.44278737902641296, "learning_rate": 9.313075383696181e-06, "loss": 0.4619, "step": 1795 }, { "epoch": 0.7564228555383967, "grad_norm": 0.4849758744239807, "learning_rate": 9.311835039683507e-06, "loss": 0.4645, "step": 1796 }, { "epoch": 0.7568440263933736, "grad_norm": 0.4970662295818329, "learning_rate": 9.310593659619365e-06, "loss": 0.4575, "step": 1797 }, { "epoch": 0.7572651972483504, "grad_norm": 0.4774876832962036, "learning_rate": 9.309351243802032e-06, "loss": 0.4836, "step": 1798 }, { "epoch": 0.7576863681033272, "grad_norm": 0.45867499709129333, "learning_rate": 9.308107792530037e-06, "loss": 0.4803, "step": 1799 }, { "epoch": 0.7581075389583041, "grad_norm": 0.5135429501533508, "learning_rate": 9.30686330610216e-06, "loss": 0.4925, "step": 1800 }, { "epoch": 0.7585287098132809, "grad_norm": 0.4478355646133423, "learning_rate": 9.305617784817426e-06, "loss": 0.4649, "step": 1801 }, { "epoch": 0.7589498806682577, "grad_norm": 0.4624705910682678, "learning_rate": 9.304371228975112e-06, "loss": 0.4808, "step": 1802 }, { "epoch": 0.7593710515232346, "grad_norm": 0.47288477420806885, "learning_rate": 9.303123638874738e-06, "loss": 0.4498, "step": 1803 }, { "epoch": 0.7597922223782114, "grad_norm": 0.47795331478118896, "learning_rate": 9.301875014816078e-06, "loss": 0.4865, "step": 1804 }, { "epoch": 0.7602133932331883, "grad_norm": 0.4689038097858429, "learning_rate": 9.300625357099152e-06, "loss": 0.4814, "step": 1805 }, { "epoch": 0.7606345640881651, "grad_norm": 0.4361303448677063, "learning_rate": 9.299374666024227e-06, "loss": 0.4522, "step": 1806 }, { "epoch": 0.7610557349431419, "grad_norm": 0.43314868211746216, "learning_rate": 9.298122941891825e-06, "loss": 0.4431, "step": 1807 }, { "epoch": 0.7614769057981188, "grad_norm": 0.5099096298217773, "learning_rate": 9.296870185002708e-06, "loss": 0.4923, "step": 1808 }, { "epoch": 0.7618980766530956, "grad_norm": 0.46681317687034607, "learning_rate": 9.295616395657889e-06, "loss": 0.4495, "step": 1809 }, { "epoch": 0.7623192475080725, "grad_norm": 0.45335328578948975, "learning_rate": 9.29436157415863e-06, "loss": 0.4827, "step": 1810 }, { "epoch": 0.7627404183630493, "grad_norm": 0.5120238661766052, "learning_rate": 9.293105720806444e-06, "loss": 0.4929, "step": 1811 }, { "epoch": 0.7631615892180261, "grad_norm": 0.47739553451538086, "learning_rate": 9.291848835903083e-06, "loss": 0.5052, "step": 1812 }, { "epoch": 0.763582760073003, "grad_norm": 0.5347999930381775, "learning_rate": 9.290590919750554e-06, "loss": 0.4604, "step": 1813 }, { "epoch": 0.7640039309279798, "grad_norm": 0.4860198199748993, "learning_rate": 9.289331972651114e-06, "loss": 0.4594, "step": 1814 }, { "epoch": 0.7644251017829566, "grad_norm": 0.501204252243042, "learning_rate": 9.288071994907262e-06, "loss": 0.5419, "step": 1815 }, { "epoch": 0.7648462726379335, "grad_norm": 0.510328471660614, "learning_rate": 9.286810986821744e-06, "loss": 0.4992, "step": 1816 }, { "epoch": 0.7652674434929103, "grad_norm": 0.5485965609550476, "learning_rate": 9.285548948697557e-06, "loss": 0.4818, "step": 1817 }, { "epoch": 0.7656886143478872, "grad_norm": 0.5432015657424927, "learning_rate": 9.284285880837947e-06, "loss": 0.4956, "step": 1818 }, { "epoch": 0.766109785202864, "grad_norm": 0.4733757972717285, "learning_rate": 9.283021783546403e-06, "loss": 0.5025, "step": 1819 }, { "epoch": 0.7665309560578408, "grad_norm": 0.5424556136131287, "learning_rate": 9.281756657126664e-06, "loss": 0.5025, "step": 1820 }, { "epoch": 0.7669521269128177, "grad_norm": 0.5605005025863647, "learning_rate": 9.280490501882715e-06, "loss": 0.5155, "step": 1821 }, { "epoch": 0.7673732977677945, "grad_norm": 0.5215995907783508, "learning_rate": 9.27922331811879e-06, "loss": 0.4632, "step": 1822 }, { "epoch": 0.7677944686227713, "grad_norm": 0.49309659004211426, "learning_rate": 9.27795510613937e-06, "loss": 0.4987, "step": 1823 }, { "epoch": 0.7682156394777482, "grad_norm": 0.481054425239563, "learning_rate": 9.276685866249178e-06, "loss": 0.4859, "step": 1824 }, { "epoch": 0.768636810332725, "grad_norm": 0.5270519852638245, "learning_rate": 9.275415598753191e-06, "loss": 0.5094, "step": 1825 }, { "epoch": 0.7690579811877019, "grad_norm": 0.477282851934433, "learning_rate": 9.27414430395663e-06, "loss": 0.4703, "step": 1826 }, { "epoch": 0.7694791520426787, "grad_norm": 0.4842403829097748, "learning_rate": 9.272871982164963e-06, "loss": 0.4802, "step": 1827 }, { "epoch": 0.7699003228976555, "grad_norm": 0.471200168132782, "learning_rate": 9.271598633683904e-06, "loss": 0.5087, "step": 1828 }, { "epoch": 0.7703214937526324, "grad_norm": 0.45819616317749023, "learning_rate": 9.270324258819413e-06, "loss": 0.5102, "step": 1829 }, { "epoch": 0.7707426646076092, "grad_norm": 0.49950575828552246, "learning_rate": 9.2690488578777e-06, "loss": 0.525, "step": 1830 }, { "epoch": 0.7711638354625859, "grad_norm": 0.4891490340232849, "learning_rate": 9.267772431165218e-06, "loss": 0.484, "step": 1831 }, { "epoch": 0.7715850063175628, "grad_norm": 0.45251163840293884, "learning_rate": 9.26649497898867e-06, "loss": 0.4641, "step": 1832 }, { "epoch": 0.7720061771725396, "grad_norm": 0.422137051820755, "learning_rate": 9.265216501655002e-06, "loss": 0.4654, "step": 1833 }, { "epoch": 0.7724273480275164, "grad_norm": 0.4974527359008789, "learning_rate": 9.263936999471406e-06, "loss": 0.494, "step": 1834 }, { "epoch": 0.7728485188824933, "grad_norm": 0.5475187301635742, "learning_rate": 9.262656472745324e-06, "loss": 0.4935, "step": 1835 }, { "epoch": 0.7732696897374701, "grad_norm": 0.4910784661769867, "learning_rate": 9.261374921784441e-06, "loss": 0.4723, "step": 1836 }, { "epoch": 0.773690860592447, "grad_norm": 0.46391841769218445, "learning_rate": 9.260092346896692e-06, "loss": 0.502, "step": 1837 }, { "epoch": 0.7741120314474238, "grad_norm": 0.5477834939956665, "learning_rate": 9.258808748390249e-06, "loss": 0.4736, "step": 1838 }, { "epoch": 0.7745332023024006, "grad_norm": 0.4640568792819977, "learning_rate": 9.257524126573542e-06, "loss": 0.4484, "step": 1839 }, { "epoch": 0.7749543731573775, "grad_norm": 0.4593585431575775, "learning_rate": 9.25623848175524e-06, "loss": 0.4578, "step": 1840 }, { "epoch": 0.7753755440123543, "grad_norm": 0.47404396533966064, "learning_rate": 9.254951814244257e-06, "loss": 0.4753, "step": 1841 }, { "epoch": 0.7757967148673311, "grad_norm": 0.4426635503768921, "learning_rate": 9.253664124349755e-06, "loss": 0.4757, "step": 1842 }, { "epoch": 0.776217885722308, "grad_norm": 0.4774438440799713, "learning_rate": 9.252375412381143e-06, "loss": 0.4541, "step": 1843 }, { "epoch": 0.7766390565772848, "grad_norm": 0.4235869348049164, "learning_rate": 9.251085678648072e-06, "loss": 0.4757, "step": 1844 }, { "epoch": 0.7770602274322617, "grad_norm": 0.49839702248573303, "learning_rate": 9.24979492346044e-06, "loss": 0.5131, "step": 1845 }, { "epoch": 0.7774813982872385, "grad_norm": 0.4851263165473938, "learning_rate": 9.248503147128394e-06, "loss": 0.5052, "step": 1846 }, { "epoch": 0.7779025691422153, "grad_norm": 0.4357233941555023, "learning_rate": 9.247210349962319e-06, "loss": 0.423, "step": 1847 }, { "epoch": 0.7783237399971922, "grad_norm": 0.4637250602245331, "learning_rate": 9.245916532272852e-06, "loss": 0.5001, "step": 1848 }, { "epoch": 0.778744910852169, "grad_norm": 0.4513920545578003, "learning_rate": 9.24462169437087e-06, "loss": 0.4685, "step": 1849 }, { "epoch": 0.7791660817071459, "grad_norm": 0.43146073818206787, "learning_rate": 9.243325836567503e-06, "loss": 0.4627, "step": 1850 }, { "epoch": 0.7795872525621227, "grad_norm": 0.5304481983184814, "learning_rate": 9.242028959174115e-06, "loss": 0.5178, "step": 1851 }, { "epoch": 0.7800084234170995, "grad_norm": 0.42440304160118103, "learning_rate": 9.240731062502323e-06, "loss": 0.4525, "step": 1852 }, { "epoch": 0.7804295942720764, "grad_norm": 0.48942703008651733, "learning_rate": 9.23943214686399e-06, "loss": 0.5021, "step": 1853 }, { "epoch": 0.7808507651270532, "grad_norm": 0.39065566658973694, "learning_rate": 9.238132212571215e-06, "loss": 0.4386, "step": 1854 }, { "epoch": 0.78127193598203, "grad_norm": 0.4541257619857788, "learning_rate": 9.236831259936352e-06, "loss": 0.5014, "step": 1855 }, { "epoch": 0.7816931068370069, "grad_norm": 0.4527304172515869, "learning_rate": 9.235529289271991e-06, "loss": 0.509, "step": 1856 }, { "epoch": 0.7821142776919837, "grad_norm": 0.4596456289291382, "learning_rate": 9.234226300890973e-06, "loss": 0.5045, "step": 1857 }, { "epoch": 0.7825354485469606, "grad_norm": 0.4639550447463989, "learning_rate": 9.232922295106383e-06, "loss": 0.5085, "step": 1858 }, { "epoch": 0.7829566194019374, "grad_norm": 0.4623013734817505, "learning_rate": 9.231617272231546e-06, "loss": 0.5002, "step": 1859 }, { "epoch": 0.7833777902569142, "grad_norm": 0.435542494058609, "learning_rate": 9.230311232580033e-06, "loss": 0.5166, "step": 1860 }, { "epoch": 0.7837989611118911, "grad_norm": 0.43823373317718506, "learning_rate": 9.229004176465662e-06, "loss": 0.4409, "step": 1861 }, { "epoch": 0.7842201319668679, "grad_norm": 0.47905850410461426, "learning_rate": 9.227696104202495e-06, "loss": 0.5228, "step": 1862 }, { "epoch": 0.7846413028218447, "grad_norm": 0.4379492998123169, "learning_rate": 9.226387016104834e-06, "loss": 0.485, "step": 1863 }, { "epoch": 0.7850624736768216, "grad_norm": 0.44444718956947327, "learning_rate": 9.22507691248723e-06, "loss": 0.4581, "step": 1864 }, { "epoch": 0.7854836445317984, "grad_norm": 0.45036765933036804, "learning_rate": 9.223765793664473e-06, "loss": 0.4666, "step": 1865 }, { "epoch": 0.7859048153867753, "grad_norm": 0.46158286929130554, "learning_rate": 9.222453659951602e-06, "loss": 0.499, "step": 1866 }, { "epoch": 0.7863259862417521, "grad_norm": 0.4396131932735443, "learning_rate": 9.221140511663899e-06, "loss": 0.4633, "step": 1867 }, { "epoch": 0.7867471570967289, "grad_norm": 0.4862862527370453, "learning_rate": 9.219826349116886e-06, "loss": 0.502, "step": 1868 }, { "epoch": 0.7871683279517058, "grad_norm": 0.47589391469955444, "learning_rate": 9.218511172626333e-06, "loss": 0.4691, "step": 1869 }, { "epoch": 0.7875894988066826, "grad_norm": 0.5003224015235901, "learning_rate": 9.217194982508248e-06, "loss": 0.5149, "step": 1870 }, { "epoch": 0.7880106696616594, "grad_norm": 0.47447511553764343, "learning_rate": 9.21587777907889e-06, "loss": 0.5036, "step": 1871 }, { "epoch": 0.7884318405166363, "grad_norm": 0.4657957851886749, "learning_rate": 9.214559562654755e-06, "loss": 0.4743, "step": 1872 }, { "epoch": 0.7888530113716131, "grad_norm": 0.4292421042919159, "learning_rate": 9.213240333552589e-06, "loss": 0.4418, "step": 1873 }, { "epoch": 0.78927418222659, "grad_norm": 0.4456825256347656, "learning_rate": 9.211920092089375e-06, "loss": 0.4798, "step": 1874 }, { "epoch": 0.7896953530815668, "grad_norm": 0.48580968379974365, "learning_rate": 9.210598838582343e-06, "loss": 0.4997, "step": 1875 }, { "epoch": 0.7901165239365436, "grad_norm": 0.42709141969680786, "learning_rate": 9.209276573348961e-06, "loss": 0.5, "step": 1876 }, { "epoch": 0.7905376947915205, "grad_norm": 0.44563326239585876, "learning_rate": 9.20795329670695e-06, "loss": 0.4946, "step": 1877 }, { "epoch": 0.7909588656464973, "grad_norm": 0.44591546058654785, "learning_rate": 9.206629008974263e-06, "loss": 0.4667, "step": 1878 }, { "epoch": 0.7913800365014741, "grad_norm": 0.4936206340789795, "learning_rate": 9.205303710469105e-06, "loss": 0.474, "step": 1879 }, { "epoch": 0.791801207356451, "grad_norm": 0.4917396306991577, "learning_rate": 9.203977401509916e-06, "loss": 0.4896, "step": 1880 }, { "epoch": 0.7922223782114278, "grad_norm": 0.4947665333747864, "learning_rate": 9.202650082415388e-06, "loss": 0.4552, "step": 1881 }, { "epoch": 0.7926435490664046, "grad_norm": 0.4721834361553192, "learning_rate": 9.201321753504444e-06, "loss": 0.4396, "step": 1882 }, { "epoch": 0.7930647199213814, "grad_norm": 0.5243302583694458, "learning_rate": 9.199992415096261e-06, "loss": 0.529, "step": 1883 }, { "epoch": 0.7934858907763582, "grad_norm": 0.538077712059021, "learning_rate": 9.19866206751025e-06, "loss": 0.4878, "step": 1884 }, { "epoch": 0.7939070616313351, "grad_norm": 0.4684341847896576, "learning_rate": 9.19733071106607e-06, "loss": 0.4792, "step": 1885 }, { "epoch": 0.7943282324863119, "grad_norm": 0.44398313760757446, "learning_rate": 9.195998346083621e-06, "loss": 0.4876, "step": 1886 }, { "epoch": 0.7947494033412887, "grad_norm": 0.5214699506759644, "learning_rate": 9.194664972883044e-06, "loss": 0.4693, "step": 1887 }, { "epoch": 0.7951705741962656, "grad_norm": 0.5131203532218933, "learning_rate": 9.193330591784723e-06, "loss": 0.5201, "step": 1888 }, { "epoch": 0.7955917450512424, "grad_norm": 0.5642892122268677, "learning_rate": 9.191995203109286e-06, "loss": 0.5277, "step": 1889 }, { "epoch": 0.7960129159062193, "grad_norm": 0.41127246618270874, "learning_rate": 9.190658807177598e-06, "loss": 0.4712, "step": 1890 }, { "epoch": 0.7964340867611961, "grad_norm": 0.4594988524913788, "learning_rate": 9.18932140431077e-06, "loss": 0.4689, "step": 1891 }, { "epoch": 0.7968552576161729, "grad_norm": 0.4788922965526581, "learning_rate": 9.187982994830157e-06, "loss": 0.4836, "step": 1892 }, { "epoch": 0.7972764284711498, "grad_norm": 0.5007444024085999, "learning_rate": 9.186643579057351e-06, "loss": 0.4603, "step": 1893 }, { "epoch": 0.7976975993261266, "grad_norm": 0.4143551290035248, "learning_rate": 9.18530315731419e-06, "loss": 0.481, "step": 1894 }, { "epoch": 0.7981187701811034, "grad_norm": 0.498913437128067, "learning_rate": 9.183961729922748e-06, "loss": 0.4592, "step": 1895 }, { "epoch": 0.7985399410360803, "grad_norm": 0.527388870716095, "learning_rate": 9.182619297205348e-06, "loss": 0.4714, "step": 1896 }, { "epoch": 0.7989611118910571, "grad_norm": 0.4831153154373169, "learning_rate": 9.181275859484551e-06, "loss": 0.4702, "step": 1897 }, { "epoch": 0.799382282746034, "grad_norm": 0.4559871256351471, "learning_rate": 9.179931417083155e-06, "loss": 0.4739, "step": 1898 }, { "epoch": 0.7998034536010108, "grad_norm": 0.4940463900566101, "learning_rate": 9.178585970324208e-06, "loss": 0.5188, "step": 1899 }, { "epoch": 0.8002246244559876, "grad_norm": 0.4845721423625946, "learning_rate": 9.177239519530995e-06, "loss": 0.5138, "step": 1900 }, { "epoch": 0.8006457953109645, "grad_norm": 0.4542626440525055, "learning_rate": 9.175892065027039e-06, "loss": 0.4705, "step": 1901 }, { "epoch": 0.8010669661659413, "grad_norm": 0.5098873972892761, "learning_rate": 9.174543607136111e-06, "loss": 0.4951, "step": 1902 }, { "epoch": 0.8014881370209181, "grad_norm": 0.5237310528755188, "learning_rate": 9.173194146182219e-06, "loss": 0.5026, "step": 1903 }, { "epoch": 0.801909307875895, "grad_norm": 0.4720877408981323, "learning_rate": 9.171843682489611e-06, "loss": 0.4977, "step": 1904 }, { "epoch": 0.8023304787308718, "grad_norm": 0.4840744435787201, "learning_rate": 9.170492216382779e-06, "loss": 0.4905, "step": 1905 }, { "epoch": 0.8027516495858487, "grad_norm": 0.4603358507156372, "learning_rate": 9.169139748186454e-06, "loss": 0.462, "step": 1906 }, { "epoch": 0.8031728204408255, "grad_norm": 0.4617783725261688, "learning_rate": 9.167786278225607e-06, "loss": 0.4659, "step": 1907 }, { "epoch": 0.8035939912958023, "grad_norm": 0.4832210838794708, "learning_rate": 9.166431806825456e-06, "loss": 0.5125, "step": 1908 }, { "epoch": 0.8040151621507792, "grad_norm": 0.47938621044158936, "learning_rate": 9.165076334311447e-06, "loss": 0.4613, "step": 1909 }, { "epoch": 0.804436333005756, "grad_norm": 0.49282675981521606, "learning_rate": 9.16371986100928e-06, "loss": 0.5066, "step": 1910 }, { "epoch": 0.8048575038607328, "grad_norm": 0.47622939944267273, "learning_rate": 9.162362387244886e-06, "loss": 0.4516, "step": 1911 }, { "epoch": 0.8052786747157097, "grad_norm": 0.5423588156700134, "learning_rate": 9.161003913344444e-06, "loss": 0.4766, "step": 1912 }, { "epoch": 0.8056998455706865, "grad_norm": 0.5001779794692993, "learning_rate": 9.159644439634364e-06, "loss": 0.4914, "step": 1913 }, { "epoch": 0.8061210164256634, "grad_norm": 0.5035297274589539, "learning_rate": 9.158283966441305e-06, "loss": 0.4789, "step": 1914 }, { "epoch": 0.8065421872806402, "grad_norm": 0.4819425642490387, "learning_rate": 9.156922494092162e-06, "loss": 0.4661, "step": 1915 }, { "epoch": 0.806963358135617, "grad_norm": 0.4888642132282257, "learning_rate": 9.155560022914072e-06, "loss": 0.4784, "step": 1916 }, { "epoch": 0.8073845289905939, "grad_norm": 0.45789918303489685, "learning_rate": 9.154196553234408e-06, "loss": 0.4944, "step": 1917 }, { "epoch": 0.8078056998455707, "grad_norm": 0.48045283555984497, "learning_rate": 9.152832085380788e-06, "loss": 0.4366, "step": 1918 }, { "epoch": 0.8082268707005476, "grad_norm": 0.5479316115379333, "learning_rate": 9.151466619681067e-06, "loss": 0.5234, "step": 1919 }, { "epoch": 0.8086480415555244, "grad_norm": 0.4829263985157013, "learning_rate": 9.150100156463337e-06, "loss": 0.5379, "step": 1920 }, { "epoch": 0.8090692124105012, "grad_norm": 0.4124409556388855, "learning_rate": 9.14873269605594e-06, "loss": 0.5043, "step": 1921 }, { "epoch": 0.8094903832654781, "grad_norm": 0.5198928713798523, "learning_rate": 9.147364238787444e-06, "loss": 0.4933, "step": 1922 }, { "epoch": 0.8099115541204549, "grad_norm": 0.4764072597026825, "learning_rate": 9.145994784986666e-06, "loss": 0.5019, "step": 1923 }, { "epoch": 0.8103327249754317, "grad_norm": 0.4936096966266632, "learning_rate": 9.14462433498266e-06, "loss": 0.5024, "step": 1924 }, { "epoch": 0.8107538958304086, "grad_norm": 0.4285197854042053, "learning_rate": 9.143252889104718e-06, "loss": 0.4911, "step": 1925 }, { "epoch": 0.8111750666853854, "grad_norm": 0.5275280475616455, "learning_rate": 9.141880447682373e-06, "loss": 0.4946, "step": 1926 }, { "epoch": 0.8115962375403623, "grad_norm": 0.42956990003585815, "learning_rate": 9.140507011045393e-06, "loss": 0.4572, "step": 1927 }, { "epoch": 0.8120174083953391, "grad_norm": 0.4408823251724243, "learning_rate": 9.139132579523792e-06, "loss": 0.4453, "step": 1928 }, { "epoch": 0.8124385792503159, "grad_norm": 0.46514075994491577, "learning_rate": 9.137757153447821e-06, "loss": 0.5003, "step": 1929 }, { "epoch": 0.8128597501052928, "grad_norm": 0.49265170097351074, "learning_rate": 9.136380733147965e-06, "loss": 0.4943, "step": 1930 }, { "epoch": 0.8132809209602696, "grad_norm": 0.4942847788333893, "learning_rate": 9.135003318954954e-06, "loss": 0.4907, "step": 1931 }, { "epoch": 0.8137020918152463, "grad_norm": 0.4266034960746765, "learning_rate": 9.133624911199751e-06, "loss": 0.4864, "step": 1932 }, { "epoch": 0.8141232626702232, "grad_norm": 0.440451055765152, "learning_rate": 9.132245510213564e-06, "loss": 0.4466, "step": 1933 }, { "epoch": 0.8145444335252, "grad_norm": 0.4673771858215332, "learning_rate": 9.130865116327838e-06, "loss": 0.4832, "step": 1934 }, { "epoch": 0.8149656043801768, "grad_norm": 0.45780128240585327, "learning_rate": 9.129483729874252e-06, "loss": 0.5131, "step": 1935 }, { "epoch": 0.8153867752351537, "grad_norm": 0.4354977011680603, "learning_rate": 9.128101351184726e-06, "loss": 0.5002, "step": 1936 }, { "epoch": 0.8158079460901305, "grad_norm": 0.41499564051628113, "learning_rate": 9.126717980591422e-06, "loss": 0.4649, "step": 1937 }, { "epoch": 0.8162291169451074, "grad_norm": 0.43786025047302246, "learning_rate": 9.125333618426736e-06, "loss": 0.4442, "step": 1938 }, { "epoch": 0.8166502878000842, "grad_norm": 0.4597048759460449, "learning_rate": 9.123948265023306e-06, "loss": 0.4522, "step": 1939 }, { "epoch": 0.817071458655061, "grad_norm": 0.4618520736694336, "learning_rate": 9.122561920714002e-06, "loss": 0.4802, "step": 1940 }, { "epoch": 0.8174926295100379, "grad_norm": 0.4856113791465759, "learning_rate": 9.12117458583194e-06, "loss": 0.466, "step": 1941 }, { "epoch": 0.8179138003650147, "grad_norm": 0.5444334745407104, "learning_rate": 9.119786260710467e-06, "loss": 0.4738, "step": 1942 }, { "epoch": 0.8183349712199915, "grad_norm": 0.463288277387619, "learning_rate": 9.118396945683173e-06, "loss": 0.493, "step": 1943 }, { "epoch": 0.8187561420749684, "grad_norm": 0.4412161111831665, "learning_rate": 9.117006641083882e-06, "loss": 0.4918, "step": 1944 }, { "epoch": 0.8191773129299452, "grad_norm": 0.5010554790496826, "learning_rate": 9.11561534724666e-06, "loss": 0.5182, "step": 1945 }, { "epoch": 0.8195984837849221, "grad_norm": 0.46610724925994873, "learning_rate": 9.114223064505805e-06, "loss": 0.4784, "step": 1946 }, { "epoch": 0.8200196546398989, "grad_norm": 0.4381648302078247, "learning_rate": 9.112829793195858e-06, "loss": 0.4818, "step": 1947 }, { "epoch": 0.8204408254948757, "grad_norm": 0.5074552893638611, "learning_rate": 9.111435533651595e-06, "loss": 0.5199, "step": 1948 }, { "epoch": 0.8208619963498526, "grad_norm": 0.4115135371685028, "learning_rate": 9.110040286208034e-06, "loss": 0.5063, "step": 1949 }, { "epoch": 0.8212831672048294, "grad_norm": 0.48281219601631165, "learning_rate": 9.108644051200417e-06, "loss": 0.4983, "step": 1950 }, { "epoch": 0.8217043380598062, "grad_norm": 0.4317837059497833, "learning_rate": 9.107246828964241e-06, "loss": 0.4619, "step": 1951 }, { "epoch": 0.8221255089147831, "grad_norm": 0.519356906414032, "learning_rate": 9.105848619835227e-06, "loss": 0.5019, "step": 1952 }, { "epoch": 0.8225466797697599, "grad_norm": 0.4734313189983368, "learning_rate": 9.104449424149342e-06, "loss": 0.4764, "step": 1953 }, { "epoch": 0.8229678506247368, "grad_norm": 0.5358612537384033, "learning_rate": 9.103049242242781e-06, "loss": 0.496, "step": 1954 }, { "epoch": 0.8233890214797136, "grad_norm": 0.4769311845302582, "learning_rate": 9.101648074451986e-06, "loss": 0.4868, "step": 1955 }, { "epoch": 0.8238101923346904, "grad_norm": 0.44033586978912354, "learning_rate": 9.100245921113627e-06, "loss": 0.4702, "step": 1956 }, { "epoch": 0.8242313631896673, "grad_norm": 0.4532148241996765, "learning_rate": 9.098842782564616e-06, "loss": 0.4902, "step": 1957 }, { "epoch": 0.8246525340446441, "grad_norm": 0.508616030216217, "learning_rate": 9.0974386591421e-06, "loss": 0.4538, "step": 1958 }, { "epoch": 0.825073704899621, "grad_norm": 0.47248369455337524, "learning_rate": 9.096033551183462e-06, "loss": 0.4865, "step": 1959 }, { "epoch": 0.8254948757545978, "grad_norm": 0.46062642335891724, "learning_rate": 9.094627459026326e-06, "loss": 0.4685, "step": 1960 }, { "epoch": 0.8259160466095746, "grad_norm": 0.47273755073547363, "learning_rate": 9.093220383008545e-06, "loss": 0.468, "step": 1961 }, { "epoch": 0.8263372174645515, "grad_norm": 0.4725767970085144, "learning_rate": 9.091812323468216e-06, "loss": 0.4851, "step": 1962 }, { "epoch": 0.8267583883195283, "grad_norm": 0.4358615577220917, "learning_rate": 9.090403280743666e-06, "loss": 0.4407, "step": 1963 }, { "epoch": 0.8271795591745051, "grad_norm": 0.4200538992881775, "learning_rate": 9.088993255173464e-06, "loss": 0.4313, "step": 1964 }, { "epoch": 0.827600730029482, "grad_norm": 0.45721060037612915, "learning_rate": 9.087582247096409e-06, "loss": 0.443, "step": 1965 }, { "epoch": 0.8280219008844588, "grad_norm": 0.47784820199012756, "learning_rate": 9.086170256851541e-06, "loss": 0.5078, "step": 1966 }, { "epoch": 0.8284430717394357, "grad_norm": 0.4942525029182434, "learning_rate": 9.084757284778132e-06, "loss": 0.4801, "step": 1967 }, { "epoch": 0.8288642425944125, "grad_norm": 0.5129073262214661, "learning_rate": 9.083343331215694e-06, "loss": 0.4634, "step": 1968 }, { "epoch": 0.8292854134493893, "grad_norm": 0.4617651402950287, "learning_rate": 9.081928396503975e-06, "loss": 0.4666, "step": 1969 }, { "epoch": 0.8297065843043662, "grad_norm": 0.5611156225204468, "learning_rate": 9.080512480982953e-06, "loss": 0.4774, "step": 1970 }, { "epoch": 0.830127755159343, "grad_norm": 0.4852682650089264, "learning_rate": 9.079095584992848e-06, "loss": 0.4732, "step": 1971 }, { "epoch": 0.8305489260143198, "grad_norm": 0.4605119824409485, "learning_rate": 9.077677708874113e-06, "loss": 0.4953, "step": 1972 }, { "epoch": 0.8309700968692967, "grad_norm": 0.4443456530570984, "learning_rate": 9.076258852967435e-06, "loss": 0.4539, "step": 1973 }, { "epoch": 0.8313912677242735, "grad_norm": 0.4286758601665497, "learning_rate": 9.074839017613737e-06, "loss": 0.4282, "step": 1974 }, { "epoch": 0.8318124385792504, "grad_norm": 0.4778732359409332, "learning_rate": 9.07341820315418e-06, "loss": 0.5002, "step": 1975 }, { "epoch": 0.8322336094342272, "grad_norm": 0.5119636654853821, "learning_rate": 9.07199640993016e-06, "loss": 0.5072, "step": 1976 }, { "epoch": 0.832654780289204, "grad_norm": 0.3700985014438629, "learning_rate": 9.070573638283302e-06, "loss": 0.4189, "step": 1977 }, { "epoch": 0.8330759511441809, "grad_norm": 0.45704683661460876, "learning_rate": 9.069149888555476e-06, "loss": 0.5089, "step": 1978 }, { "epoch": 0.8334971219991577, "grad_norm": 0.4292401969432831, "learning_rate": 9.06772516108878e-06, "loss": 0.5024, "step": 1979 }, { "epoch": 0.8339182928541345, "grad_norm": 0.42895370721817017, "learning_rate": 9.066299456225548e-06, "loss": 0.4578, "step": 1980 }, { "epoch": 0.8343394637091114, "grad_norm": 0.43499720096588135, "learning_rate": 9.064872774308349e-06, "loss": 0.4599, "step": 1981 }, { "epoch": 0.8347606345640881, "grad_norm": 0.5048273205757141, "learning_rate": 9.06344511567999e-06, "loss": 0.5386, "step": 1982 }, { "epoch": 0.835181805419065, "grad_norm": 0.4178802967071533, "learning_rate": 9.062016480683506e-06, "loss": 0.482, "step": 1983 }, { "epoch": 0.8356029762740418, "grad_norm": 0.4689487814903259, "learning_rate": 9.060586869662175e-06, "loss": 0.4804, "step": 1984 }, { "epoch": 0.8360241471290186, "grad_norm": 0.4560565948486328, "learning_rate": 9.059156282959504e-06, "loss": 0.4489, "step": 1985 }, { "epoch": 0.8364453179839955, "grad_norm": 0.4784211814403534, "learning_rate": 9.057724720919234e-06, "loss": 0.4597, "step": 1986 }, { "epoch": 0.8368664888389723, "grad_norm": 0.5447170734405518, "learning_rate": 9.056292183885342e-06, "loss": 0.4908, "step": 1987 }, { "epoch": 0.8372876596939491, "grad_norm": 0.4904424250125885, "learning_rate": 9.05485867220204e-06, "loss": 0.4722, "step": 1988 }, { "epoch": 0.837708830548926, "grad_norm": 0.471786767244339, "learning_rate": 9.053424186213776e-06, "loss": 0.5011, "step": 1989 }, { "epoch": 0.8381300014039028, "grad_norm": 0.535971999168396, "learning_rate": 9.051988726265224e-06, "loss": 0.4851, "step": 1990 }, { "epoch": 0.8385511722588797, "grad_norm": 0.5066270232200623, "learning_rate": 9.050552292701303e-06, "loss": 0.4701, "step": 1991 }, { "epoch": 0.8389723431138565, "grad_norm": 0.43761447072029114, "learning_rate": 9.049114885867156e-06, "loss": 0.4725, "step": 1992 }, { "epoch": 0.8393935139688333, "grad_norm": 0.541511595249176, "learning_rate": 9.047676506108167e-06, "loss": 0.48, "step": 1993 }, { "epoch": 0.8398146848238102, "grad_norm": 0.4538271725177765, "learning_rate": 9.046237153769952e-06, "loss": 0.471, "step": 1994 }, { "epoch": 0.840235855678787, "grad_norm": 0.459358811378479, "learning_rate": 9.044796829198356e-06, "loss": 0.5218, "step": 1995 }, { "epoch": 0.8406570265337638, "grad_norm": 0.5027135014533997, "learning_rate": 9.043355532739465e-06, "loss": 0.4925, "step": 1996 }, { "epoch": 0.8410781973887407, "grad_norm": 0.5200634002685547, "learning_rate": 9.041913264739597e-06, "loss": 0.4672, "step": 1997 }, { "epoch": 0.8414993682437175, "grad_norm": 0.516686737537384, "learning_rate": 9.040470025545294e-06, "loss": 0.4963, "step": 1998 }, { "epoch": 0.8419205390986944, "grad_norm": 0.5840184092521667, "learning_rate": 9.039025815503346e-06, "loss": 0.4966, "step": 1999 }, { "epoch": 0.8423417099536712, "grad_norm": 0.573348343372345, "learning_rate": 9.037580634960764e-06, "loss": 0.4809, "step": 2000 }, { "epoch": 0.842762880808648, "grad_norm": 0.42695775628089905, "learning_rate": 9.0361344842648e-06, "loss": 0.4533, "step": 2001 }, { "epoch": 0.8431840516636249, "grad_norm": 0.4760417640209198, "learning_rate": 9.034687363762935e-06, "loss": 0.4614, "step": 2002 }, { "epoch": 0.8436052225186017, "grad_norm": 0.4947229325771332, "learning_rate": 9.033239273802887e-06, "loss": 0.4919, "step": 2003 }, { "epoch": 0.8440263933735785, "grad_norm": 0.4787858724594116, "learning_rate": 9.031790214732601e-06, "loss": 0.4852, "step": 2004 }, { "epoch": 0.8444475642285554, "grad_norm": 0.5044967532157898, "learning_rate": 9.03034018690026e-06, "loss": 0.4914, "step": 2005 }, { "epoch": 0.8448687350835322, "grad_norm": 0.4302194118499756, "learning_rate": 9.028889190654278e-06, "loss": 0.4791, "step": 2006 }, { "epoch": 0.8452899059385091, "grad_norm": 0.49837639927864075, "learning_rate": 9.0274372263433e-06, "loss": 0.4691, "step": 2007 }, { "epoch": 0.8457110767934859, "grad_norm": 0.4533718228340149, "learning_rate": 9.025984294316206e-06, "loss": 0.4629, "step": 2008 }, { "epoch": 0.8461322476484627, "grad_norm": 0.5057711601257324, "learning_rate": 9.024530394922109e-06, "loss": 0.5018, "step": 2009 }, { "epoch": 0.8465534185034396, "grad_norm": 0.5171310305595398, "learning_rate": 9.023075528510353e-06, "loss": 0.5073, "step": 2010 }, { "epoch": 0.8469745893584164, "grad_norm": 0.4641430377960205, "learning_rate": 9.021619695430513e-06, "loss": 0.4583, "step": 2011 }, { "epoch": 0.8473957602133932, "grad_norm": 0.4518853425979614, "learning_rate": 9.020162896032397e-06, "loss": 0.4745, "step": 2012 }, { "epoch": 0.8478169310683701, "grad_norm": 0.47213971614837646, "learning_rate": 9.01870513066605e-06, "loss": 0.4765, "step": 2013 }, { "epoch": 0.8482381019233469, "grad_norm": 0.46457046270370483, "learning_rate": 9.017246399681742e-06, "loss": 0.5049, "step": 2014 }, { "epoch": 0.8486592727783238, "grad_norm": 0.44556260108947754, "learning_rate": 9.015786703429981e-06, "loss": 0.4427, "step": 2015 }, { "epoch": 0.8490804436333006, "grad_norm": 0.4394408166408539, "learning_rate": 9.014326042261498e-06, "loss": 0.4546, "step": 2016 }, { "epoch": 0.8495016144882774, "grad_norm": 0.4620637893676758, "learning_rate": 9.012864416527269e-06, "loss": 0.4742, "step": 2017 }, { "epoch": 0.8499227853432543, "grad_norm": 0.4519009292125702, "learning_rate": 9.011401826578492e-06, "loss": 0.4695, "step": 2018 }, { "epoch": 0.8503439561982311, "grad_norm": 0.47295549511909485, "learning_rate": 9.009938272766599e-06, "loss": 0.4424, "step": 2019 }, { "epoch": 0.850765127053208, "grad_norm": 0.43657752871513367, "learning_rate": 9.008473755443254e-06, "loss": 0.4741, "step": 2020 }, { "epoch": 0.8511862979081848, "grad_norm": 0.4333527386188507, "learning_rate": 9.007008274960351e-06, "loss": 0.4767, "step": 2021 }, { "epoch": 0.8516074687631616, "grad_norm": 0.42861422896385193, "learning_rate": 9.00554183167002e-06, "loss": 0.4577, "step": 2022 }, { "epoch": 0.8520286396181385, "grad_norm": 0.5385825037956238, "learning_rate": 9.004074425924618e-06, "loss": 0.4987, "step": 2023 }, { "epoch": 0.8524498104731153, "grad_norm": 0.43487054109573364, "learning_rate": 9.002606058076733e-06, "loss": 0.4509, "step": 2024 }, { "epoch": 0.8528709813280921, "grad_norm": 0.44511500000953674, "learning_rate": 9.00113672847919e-06, "loss": 0.4682, "step": 2025 }, { "epoch": 0.853292152183069, "grad_norm": 0.40977466106414795, "learning_rate": 8.999666437485034e-06, "loss": 0.458, "step": 2026 }, { "epoch": 0.8537133230380458, "grad_norm": 0.48852282762527466, "learning_rate": 8.998195185447553e-06, "loss": 0.4806, "step": 2027 }, { "epoch": 0.8541344938930227, "grad_norm": 0.4343629777431488, "learning_rate": 8.996722972720261e-06, "loss": 0.4917, "step": 2028 }, { "epoch": 0.8545556647479995, "grad_norm": 0.4515407979488373, "learning_rate": 8.9952497996569e-06, "loss": 0.4842, "step": 2029 }, { "epoch": 0.8549768356029763, "grad_norm": 0.5255452394485474, "learning_rate": 8.993775666611447e-06, "loss": 0.4993, "step": 2030 }, { "epoch": 0.8553980064579532, "grad_norm": 0.49419015645980835, "learning_rate": 8.992300573938104e-06, "loss": 0.4808, "step": 2031 }, { "epoch": 0.8558191773129299, "grad_norm": 0.5090222358703613, "learning_rate": 8.990824521991312e-06, "loss": 0.4955, "step": 2032 }, { "epoch": 0.8562403481679067, "grad_norm": 0.44439074397087097, "learning_rate": 8.98934751112574e-06, "loss": 0.4973, "step": 2033 }, { "epoch": 0.8566615190228836, "grad_norm": 0.43624746799468994, "learning_rate": 8.987869541696278e-06, "loss": 0.4535, "step": 2034 }, { "epoch": 0.8570826898778604, "grad_norm": 0.4849873185157776, "learning_rate": 8.98639061405806e-06, "loss": 0.5216, "step": 2035 }, { "epoch": 0.8575038607328372, "grad_norm": 0.48462268710136414, "learning_rate": 8.984910728566443e-06, "loss": 0.4948, "step": 2036 }, { "epoch": 0.8579250315878141, "grad_norm": 0.44123953580856323, "learning_rate": 8.983429885577013e-06, "loss": 0.4481, "step": 2037 }, { "epoch": 0.8583462024427909, "grad_norm": 0.4336085617542267, "learning_rate": 8.98194808544559e-06, "loss": 0.4409, "step": 2038 }, { "epoch": 0.8587673732977678, "grad_norm": 0.42986756563186646, "learning_rate": 8.98046532852822e-06, "loss": 0.5043, "step": 2039 }, { "epoch": 0.8591885441527446, "grad_norm": 0.4464740455150604, "learning_rate": 8.978981615181184e-06, "loss": 0.49, "step": 2040 }, { "epoch": 0.8596097150077214, "grad_norm": 0.47328290343284607, "learning_rate": 8.977496945760989e-06, "loss": 0.5033, "step": 2041 }, { "epoch": 0.8600308858626983, "grad_norm": 0.47700807452201843, "learning_rate": 8.97601132062437e-06, "loss": 0.4817, "step": 2042 }, { "epoch": 0.8604520567176751, "grad_norm": 0.4563295245170593, "learning_rate": 8.974524740128298e-06, "loss": 0.4858, "step": 2043 }, { "epoch": 0.860873227572652, "grad_norm": 0.47375327348709106, "learning_rate": 8.97303720462997e-06, "loss": 0.49, "step": 2044 }, { "epoch": 0.8612943984276288, "grad_norm": 0.49899405241012573, "learning_rate": 8.971548714486811e-06, "loss": 0.4739, "step": 2045 }, { "epoch": 0.8617155692826056, "grad_norm": 0.4481198489665985, "learning_rate": 8.970059270056476e-06, "loss": 0.4766, "step": 2046 }, { "epoch": 0.8621367401375825, "grad_norm": 0.4497876763343811, "learning_rate": 8.968568871696847e-06, "loss": 0.4463, "step": 2047 }, { "epoch": 0.8625579109925593, "grad_norm": 0.5360743999481201, "learning_rate": 8.967077519766045e-06, "loss": 0.5064, "step": 2048 }, { "epoch": 0.8629790818475361, "grad_norm": 0.4273415505886078, "learning_rate": 8.965585214622411e-06, "loss": 0.4641, "step": 2049 }, { "epoch": 0.863400252702513, "grad_norm": 0.4775114059448242, "learning_rate": 8.964091956624516e-06, "loss": 0.4785, "step": 2050 }, { "epoch": 0.8638214235574898, "grad_norm": 0.44742056727409363, "learning_rate": 8.96259774613116e-06, "loss": 0.4665, "step": 2051 }, { "epoch": 0.8642425944124666, "grad_norm": 0.46713048219680786, "learning_rate": 8.961102583501377e-06, "loss": 0.4559, "step": 2052 }, { "epoch": 0.8646637652674435, "grad_norm": 0.448961079120636, "learning_rate": 8.959606469094422e-06, "loss": 0.4804, "step": 2053 }, { "epoch": 0.8650849361224203, "grad_norm": 0.48275259137153625, "learning_rate": 8.958109403269785e-06, "loss": 0.5112, "step": 2054 }, { "epoch": 0.8655061069773972, "grad_norm": 0.45079466700553894, "learning_rate": 8.956611386387184e-06, "loss": 0.4519, "step": 2055 }, { "epoch": 0.865927277832374, "grad_norm": 0.4772886037826538, "learning_rate": 8.95511241880656e-06, "loss": 0.494, "step": 2056 }, { "epoch": 0.8663484486873508, "grad_norm": 0.48762187361717224, "learning_rate": 8.953612500888088e-06, "loss": 0.4791, "step": 2057 }, { "epoch": 0.8667696195423277, "grad_norm": 0.44801953434944153, "learning_rate": 8.95211163299217e-06, "loss": 0.4746, "step": 2058 }, { "epoch": 0.8671907903973045, "grad_norm": 0.4378618001937866, "learning_rate": 8.950609815479433e-06, "loss": 0.4909, "step": 2059 }, { "epoch": 0.8676119612522813, "grad_norm": 0.4578043520450592, "learning_rate": 8.949107048710742e-06, "loss": 0.4732, "step": 2060 }, { "epoch": 0.8680331321072582, "grad_norm": 0.4966736435890198, "learning_rate": 8.947603333047175e-06, "loss": 0.4793, "step": 2061 }, { "epoch": 0.868454302962235, "grad_norm": 0.42368918657302856, "learning_rate": 8.946098668850051e-06, "loss": 0.4507, "step": 2062 }, { "epoch": 0.8688754738172119, "grad_norm": 0.44014424085617065, "learning_rate": 8.94459305648091e-06, "loss": 0.4706, "step": 2063 }, { "epoch": 0.8692966446721887, "grad_norm": 0.4910299777984619, "learning_rate": 8.943086496301523e-06, "loss": 0.4758, "step": 2064 }, { "epoch": 0.8697178155271655, "grad_norm": 0.4451316297054291, "learning_rate": 8.941578988673887e-06, "loss": 0.5107, "step": 2065 }, { "epoch": 0.8701389863821424, "grad_norm": 0.41387519240379333, "learning_rate": 8.940070533960226e-06, "loss": 0.4448, "step": 2066 }, { "epoch": 0.8705601572371192, "grad_norm": 0.45618370175361633, "learning_rate": 8.938561132522997e-06, "loss": 0.4861, "step": 2067 }, { "epoch": 0.870981328092096, "grad_norm": 0.3971850574016571, "learning_rate": 8.937050784724874e-06, "loss": 0.446, "step": 2068 }, { "epoch": 0.8714024989470729, "grad_norm": 0.536045491695404, "learning_rate": 8.93553949092877e-06, "loss": 0.5174, "step": 2069 }, { "epoch": 0.8718236698020497, "grad_norm": 0.48977214097976685, "learning_rate": 8.934027251497818e-06, "loss": 0.466, "step": 2070 }, { "epoch": 0.8722448406570266, "grad_norm": 0.4507426619529724, "learning_rate": 8.93251406679538e-06, "loss": 0.4786, "step": 2071 }, { "epoch": 0.8726660115120034, "grad_norm": 0.49123117327690125, "learning_rate": 8.930999937185046e-06, "loss": 0.5147, "step": 2072 }, { "epoch": 0.8730871823669802, "grad_norm": 0.47335997223854065, "learning_rate": 8.929484863030631e-06, "loss": 0.4554, "step": 2073 }, { "epoch": 0.8735083532219571, "grad_norm": 0.41565749049186707, "learning_rate": 8.927968844696179e-06, "loss": 0.4778, "step": 2074 }, { "epoch": 0.8739295240769339, "grad_norm": 0.4280366003513336, "learning_rate": 8.926451882545962e-06, "loss": 0.4931, "step": 2075 }, { "epoch": 0.8743506949319108, "grad_norm": 0.5149115324020386, "learning_rate": 8.924933976944474e-06, "loss": 0.4801, "step": 2076 }, { "epoch": 0.8747718657868876, "grad_norm": 0.42649513483047485, "learning_rate": 8.92341512825644e-06, "loss": 0.4753, "step": 2077 }, { "epoch": 0.8751930366418644, "grad_norm": 0.41391825675964355, "learning_rate": 8.921895336846814e-06, "loss": 0.4788, "step": 2078 }, { "epoch": 0.8756142074968413, "grad_norm": 0.46750977635383606, "learning_rate": 8.920374603080764e-06, "loss": 0.4955, "step": 2079 }, { "epoch": 0.8760353783518181, "grad_norm": 0.44684118032455444, "learning_rate": 8.918852927323702e-06, "loss": 0.5066, "step": 2080 }, { "epoch": 0.8764565492067949, "grad_norm": 0.4626574218273163, "learning_rate": 8.917330309941253e-06, "loss": 0.462, "step": 2081 }, { "epoch": 0.8768777200617717, "grad_norm": 0.46402493119239807, "learning_rate": 8.915806751299274e-06, "loss": 0.4829, "step": 2082 }, { "epoch": 0.8772988909167485, "grad_norm": 0.5034680962562561, "learning_rate": 8.914282251763848e-06, "loss": 0.4844, "step": 2083 }, { "epoch": 0.8777200617717253, "grad_norm": 0.44258493185043335, "learning_rate": 8.91275681170128e-06, "loss": 0.5045, "step": 2084 }, { "epoch": 0.8781412326267022, "grad_norm": 0.48845288157463074, "learning_rate": 8.911230431478108e-06, "loss": 0.5105, "step": 2085 }, { "epoch": 0.878562403481679, "grad_norm": 0.43015560507774353, "learning_rate": 8.909703111461088e-06, "loss": 0.4568, "step": 2086 }, { "epoch": 0.8789835743366559, "grad_norm": 0.45377230644226074, "learning_rate": 8.908174852017209e-06, "loss": 0.4515, "step": 2087 }, { "epoch": 0.8794047451916327, "grad_norm": 0.4362310469150543, "learning_rate": 8.906645653513681e-06, "loss": 0.4489, "step": 2088 }, { "epoch": 0.8798259160466095, "grad_norm": 0.4064888060092926, "learning_rate": 8.905115516317942e-06, "loss": 0.4302, "step": 2089 }, { "epoch": 0.8802470869015864, "grad_norm": 0.4214572012424469, "learning_rate": 8.903584440797652e-06, "loss": 0.5, "step": 2090 }, { "epoch": 0.8806682577565632, "grad_norm": 0.47624263167381287, "learning_rate": 8.902052427320705e-06, "loss": 0.4663, "step": 2091 }, { "epoch": 0.88108942861154, "grad_norm": 0.4501442611217499, "learning_rate": 8.900519476255207e-06, "loss": 0.4802, "step": 2092 }, { "epoch": 0.8815105994665169, "grad_norm": 0.4506746232509613, "learning_rate": 8.898985587969502e-06, "loss": 0.4908, "step": 2093 }, { "epoch": 0.8819317703214937, "grad_norm": 0.45782095193862915, "learning_rate": 8.897450762832154e-06, "loss": 0.4563, "step": 2094 }, { "epoch": 0.8823529411764706, "grad_norm": 0.4786173403263092, "learning_rate": 8.895915001211948e-06, "loss": 0.4723, "step": 2095 }, { "epoch": 0.8827741120314474, "grad_norm": 0.518242359161377, "learning_rate": 8.894378303477902e-06, "loss": 0.4752, "step": 2096 }, { "epoch": 0.8831952828864242, "grad_norm": 0.45923012495040894, "learning_rate": 8.892840669999255e-06, "loss": 0.4735, "step": 2097 }, { "epoch": 0.8836164537414011, "grad_norm": 0.6489816904067993, "learning_rate": 8.89130210114547e-06, "loss": 0.4918, "step": 2098 }, { "epoch": 0.8840376245963779, "grad_norm": 0.48492562770843506, "learning_rate": 8.889762597286233e-06, "loss": 0.5112, "step": 2099 }, { "epoch": 0.8844587954513548, "grad_norm": 0.5190897583961487, "learning_rate": 8.888222158791462e-06, "loss": 0.5185, "step": 2100 }, { "epoch": 0.8848799663063316, "grad_norm": 0.5371636152267456, "learning_rate": 8.886680786031292e-06, "loss": 0.491, "step": 2101 }, { "epoch": 0.8853011371613084, "grad_norm": 0.44500967860221863, "learning_rate": 8.885138479376087e-06, "loss": 0.4691, "step": 2102 }, { "epoch": 0.8857223080162853, "grad_norm": 0.4203650951385498, "learning_rate": 8.883595239196431e-06, "loss": 0.4754, "step": 2103 }, { "epoch": 0.8861434788712621, "grad_norm": 0.4941357374191284, "learning_rate": 8.88205106586314e-06, "loss": 0.4884, "step": 2104 }, { "epoch": 0.8865646497262389, "grad_norm": 0.47652238607406616, "learning_rate": 8.880505959747245e-06, "loss": 0.461, "step": 2105 }, { "epoch": 0.8869858205812158, "grad_norm": 0.4845276474952698, "learning_rate": 8.878959921220005e-06, "loss": 0.5083, "step": 2106 }, { "epoch": 0.8874069914361926, "grad_norm": 0.49782896041870117, "learning_rate": 8.877412950652907e-06, "loss": 0.4893, "step": 2107 }, { "epoch": 0.8878281622911695, "grad_norm": 0.4891113042831421, "learning_rate": 8.875865048417657e-06, "loss": 0.4778, "step": 2108 }, { "epoch": 0.8882493331461463, "grad_norm": 0.43920233845710754, "learning_rate": 8.874316214886185e-06, "loss": 0.4899, "step": 2109 }, { "epoch": 0.8886705040011231, "grad_norm": 0.4586823880672455, "learning_rate": 8.872766450430647e-06, "loss": 0.4792, "step": 2110 }, { "epoch": 0.8890916748561, "grad_norm": 0.4892849624156952, "learning_rate": 8.871215755423422e-06, "loss": 0.4898, "step": 2111 }, { "epoch": 0.8895128457110768, "grad_norm": 0.49404770135879517, "learning_rate": 8.869664130237112e-06, "loss": 0.4728, "step": 2112 }, { "epoch": 0.8899340165660536, "grad_norm": 0.44157418608665466, "learning_rate": 8.868111575244543e-06, "loss": 0.4606, "step": 2113 }, { "epoch": 0.8903551874210305, "grad_norm": 0.4824812412261963, "learning_rate": 8.866558090818765e-06, "loss": 0.4701, "step": 2114 }, { "epoch": 0.8907763582760073, "grad_norm": 0.4540882706642151, "learning_rate": 8.865003677333048e-06, "loss": 0.4375, "step": 2115 }, { "epoch": 0.8911975291309842, "grad_norm": 0.4555933177471161, "learning_rate": 8.86344833516089e-06, "loss": 0.5399, "step": 2116 }, { "epoch": 0.891618699985961, "grad_norm": 0.4483009874820709, "learning_rate": 8.86189206467601e-06, "loss": 0.4889, "step": 2117 }, { "epoch": 0.8920398708409378, "grad_norm": 0.4683294892311096, "learning_rate": 8.86033486625235e-06, "loss": 0.4885, "step": 2118 }, { "epoch": 0.8924610416959147, "grad_norm": 0.5074185132980347, "learning_rate": 8.858776740264075e-06, "loss": 0.4997, "step": 2119 }, { "epoch": 0.8928822125508915, "grad_norm": 0.479703813791275, "learning_rate": 8.857217687085571e-06, "loss": 0.4813, "step": 2120 }, { "epoch": 0.8933033834058683, "grad_norm": 0.4071308672428131, "learning_rate": 8.855657707091452e-06, "loss": 0.4819, "step": 2121 }, { "epoch": 0.8937245542608452, "grad_norm": 0.47727707028388977, "learning_rate": 8.854096800656548e-06, "loss": 0.4772, "step": 2122 }, { "epoch": 0.894145725115822, "grad_norm": 0.49604249000549316, "learning_rate": 8.852534968155918e-06, "loss": 0.4935, "step": 2123 }, { "epoch": 0.8945668959707989, "grad_norm": 0.4166507124900818, "learning_rate": 8.850972209964837e-06, "loss": 0.4664, "step": 2124 }, { "epoch": 0.8949880668257757, "grad_norm": 0.4490072727203369, "learning_rate": 8.849408526458809e-06, "loss": 0.5134, "step": 2125 }, { "epoch": 0.8954092376807525, "grad_norm": 0.4396776854991913, "learning_rate": 8.847843918013556e-06, "loss": 0.4629, "step": 2126 }, { "epoch": 0.8958304085357294, "grad_norm": 0.48188525438308716, "learning_rate": 8.846278385005023e-06, "loss": 0.4687, "step": 2127 }, { "epoch": 0.8962515793907062, "grad_norm": 0.4509187340736389, "learning_rate": 8.844711927809377e-06, "loss": 0.4965, "step": 2128 }, { "epoch": 0.896672750245683, "grad_norm": 0.4357057213783264, "learning_rate": 8.84314454680301e-06, "loss": 0.4833, "step": 2129 }, { "epoch": 0.8970939211006599, "grad_norm": 0.46263501048088074, "learning_rate": 8.841576242362535e-06, "loss": 0.4857, "step": 2130 }, { "epoch": 0.8975150919556367, "grad_norm": 0.495802640914917, "learning_rate": 8.84000701486478e-06, "loss": 0.5089, "step": 2131 }, { "epoch": 0.8979362628106135, "grad_norm": 0.46324622631073, "learning_rate": 8.838436864686803e-06, "loss": 0.4847, "step": 2132 }, { "epoch": 0.8983574336655903, "grad_norm": 0.4665319323539734, "learning_rate": 8.836865792205883e-06, "loss": 0.5461, "step": 2133 }, { "epoch": 0.8987786045205671, "grad_norm": 0.4965471923351288, "learning_rate": 8.835293797799517e-06, "loss": 0.4947, "step": 2134 }, { "epoch": 0.899199775375544, "grad_norm": 0.4445795714855194, "learning_rate": 8.833720881845425e-06, "loss": 0.4992, "step": 2135 }, { "epoch": 0.8996209462305208, "grad_norm": 0.48683586716651917, "learning_rate": 8.832147044721549e-06, "loss": 0.4992, "step": 2136 }, { "epoch": 0.9000421170854976, "grad_norm": 0.4320569932460785, "learning_rate": 8.830572286806052e-06, "loss": 0.465, "step": 2137 }, { "epoch": 0.9004632879404745, "grad_norm": 0.44253572821617126, "learning_rate": 8.828996608477318e-06, "loss": 0.4939, "step": 2138 }, { "epoch": 0.9008844587954513, "grad_norm": 0.4848971366882324, "learning_rate": 8.827420010113952e-06, "loss": 0.491, "step": 2139 }, { "epoch": 0.9013056296504282, "grad_norm": 0.43713873624801636, "learning_rate": 8.825842492094781e-06, "loss": 0.4555, "step": 2140 }, { "epoch": 0.901726800505405, "grad_norm": 0.44368067383766174, "learning_rate": 8.824264054798852e-06, "loss": 0.4697, "step": 2141 }, { "epoch": 0.9021479713603818, "grad_norm": 0.47087860107421875, "learning_rate": 8.822684698605433e-06, "loss": 0.4876, "step": 2142 }, { "epoch": 0.9025691422153587, "grad_norm": 0.4712139964103699, "learning_rate": 8.821104423894015e-06, "loss": 0.486, "step": 2143 }, { "epoch": 0.9029903130703355, "grad_norm": 0.4994378983974457, "learning_rate": 8.819523231044305e-06, "loss": 0.4737, "step": 2144 }, { "epoch": 0.9034114839253123, "grad_norm": 0.4611441493034363, "learning_rate": 8.817941120436238e-06, "loss": 0.4735, "step": 2145 }, { "epoch": 0.9038326547802892, "grad_norm": 0.48226290941238403, "learning_rate": 8.816358092449957e-06, "loss": 0.4937, "step": 2146 }, { "epoch": 0.904253825635266, "grad_norm": 0.42696529626846313, "learning_rate": 8.81477414746584e-06, "loss": 0.4659, "step": 2147 }, { "epoch": 0.9046749964902429, "grad_norm": 0.444555401802063, "learning_rate": 8.813189285864479e-06, "loss": 0.4483, "step": 2148 }, { "epoch": 0.9050961673452197, "grad_norm": 0.48609107732772827, "learning_rate": 8.81160350802668e-06, "loss": 0.4815, "step": 2149 }, { "epoch": 0.9055173382001965, "grad_norm": 0.4767249822616577, "learning_rate": 8.810016814333482e-06, "loss": 0.4929, "step": 2150 }, { "epoch": 0.9059385090551734, "grad_norm": 0.46809589862823486, "learning_rate": 8.808429205166131e-06, "loss": 0.4546, "step": 2151 }, { "epoch": 0.9063596799101502, "grad_norm": 0.4402042627334595, "learning_rate": 8.806840680906103e-06, "loss": 0.4636, "step": 2152 }, { "epoch": 0.906780850765127, "grad_norm": 0.4884268045425415, "learning_rate": 8.80525124193509e-06, "loss": 0.5141, "step": 2153 }, { "epoch": 0.9072020216201039, "grad_norm": 0.4865548312664032, "learning_rate": 8.803660888635002e-06, "loss": 0.4654, "step": 2154 }, { "epoch": 0.9076231924750807, "grad_norm": 0.50327467918396, "learning_rate": 8.80206962138797e-06, "loss": 0.4947, "step": 2155 }, { "epoch": 0.9080443633300576, "grad_norm": 0.4872549772262573, "learning_rate": 8.800477440576348e-06, "loss": 0.4953, "step": 2156 }, { "epoch": 0.9084655341850344, "grad_norm": 0.49242129921913147, "learning_rate": 8.798884346582703e-06, "loss": 0.5071, "step": 2157 }, { "epoch": 0.9088867050400112, "grad_norm": 0.4815231263637543, "learning_rate": 8.797290339789827e-06, "loss": 0.4618, "step": 2158 }, { "epoch": 0.9093078758949881, "grad_norm": 0.540748655796051, "learning_rate": 8.795695420580733e-06, "loss": 0.486, "step": 2159 }, { "epoch": 0.9097290467499649, "grad_norm": 0.4837559163570404, "learning_rate": 8.794099589338642e-06, "loss": 0.496, "step": 2160 }, { "epoch": 0.9101502176049417, "grad_norm": 0.44035258889198303, "learning_rate": 8.792502846447007e-06, "loss": 0.4281, "step": 2161 }, { "epoch": 0.9105713884599186, "grad_norm": 0.4931212067604065, "learning_rate": 8.790905192289492e-06, "loss": 0.5102, "step": 2162 }, { "epoch": 0.9109925593148954, "grad_norm": 0.480918824672699, "learning_rate": 8.789306627249985e-06, "loss": 0.4684, "step": 2163 }, { "epoch": 0.9114137301698723, "grad_norm": 0.4887574315071106, "learning_rate": 8.787707151712588e-06, "loss": 0.5097, "step": 2164 }, { "epoch": 0.9118349010248491, "grad_norm": 0.562170147895813, "learning_rate": 8.786106766061629e-06, "loss": 0.533, "step": 2165 }, { "epoch": 0.9122560718798259, "grad_norm": 0.4587405025959015, "learning_rate": 8.784505470681644e-06, "loss": 0.4856, "step": 2166 }, { "epoch": 0.9126772427348028, "grad_norm": 0.4619036018848419, "learning_rate": 8.782903265957398e-06, "loss": 0.4849, "step": 2167 }, { "epoch": 0.9130984135897796, "grad_norm": 0.5511062741279602, "learning_rate": 8.781300152273868e-06, "loss": 0.5081, "step": 2168 }, { "epoch": 0.9135195844447564, "grad_norm": 0.5170078873634338, "learning_rate": 8.779696130016253e-06, "loss": 0.455, "step": 2169 }, { "epoch": 0.9139407552997333, "grad_norm": 0.4518997073173523, "learning_rate": 8.77809119956997e-06, "loss": 0.4736, "step": 2170 }, { "epoch": 0.9143619261547101, "grad_norm": 0.4249887764453888, "learning_rate": 8.776485361320647e-06, "loss": 0.4688, "step": 2171 }, { "epoch": 0.914783097009687, "grad_norm": 0.49037083983421326, "learning_rate": 8.774878615654144e-06, "loss": 0.4908, "step": 2172 }, { "epoch": 0.9152042678646638, "grad_norm": 0.48783934116363525, "learning_rate": 8.773270962956527e-06, "loss": 0.4924, "step": 2173 }, { "epoch": 0.9156254387196406, "grad_norm": 0.5265381336212158, "learning_rate": 8.771662403614085e-06, "loss": 0.5076, "step": 2174 }, { "epoch": 0.9160466095746175, "grad_norm": 0.49255144596099854, "learning_rate": 8.770052938013323e-06, "loss": 0.5074, "step": 2175 }, { "epoch": 0.9164677804295943, "grad_norm": 0.4336250126361847, "learning_rate": 8.768442566540967e-06, "loss": 0.4581, "step": 2176 }, { "epoch": 0.9168889512845712, "grad_norm": 0.5518732666969299, "learning_rate": 8.766831289583956e-06, "loss": 0.4724, "step": 2177 }, { "epoch": 0.917310122139548, "grad_norm": 0.5205866694450378, "learning_rate": 8.765219107529451e-06, "loss": 0.4664, "step": 2178 }, { "epoch": 0.9177312929945248, "grad_norm": 0.4125896692276001, "learning_rate": 8.763606020764828e-06, "loss": 0.4414, "step": 2179 }, { "epoch": 0.9181524638495017, "grad_norm": 0.45991894602775574, "learning_rate": 8.761992029677681e-06, "loss": 0.4933, "step": 2180 }, { "epoch": 0.9185736347044785, "grad_norm": 0.48181480169296265, "learning_rate": 8.76037713465582e-06, "loss": 0.4865, "step": 2181 }, { "epoch": 0.9189948055594553, "grad_norm": 0.4674176871776581, "learning_rate": 8.758761336087274e-06, "loss": 0.5113, "step": 2182 }, { "epoch": 0.9194159764144321, "grad_norm": 0.4401403069496155, "learning_rate": 8.75714463436029e-06, "loss": 0.4895, "step": 2183 }, { "epoch": 0.9198371472694089, "grad_norm": 0.4537390470504761, "learning_rate": 8.755527029863327e-06, "loss": 0.4667, "step": 2184 }, { "epoch": 0.9202583181243857, "grad_norm": 0.4921032190322876, "learning_rate": 8.753908522985068e-06, "loss": 0.4546, "step": 2185 }, { "epoch": 0.9206794889793626, "grad_norm": 0.44763901829719543, "learning_rate": 8.752289114114407e-06, "loss": 0.4653, "step": 2186 }, { "epoch": 0.9211006598343394, "grad_norm": 0.5183508992195129, "learning_rate": 8.750668803640459e-06, "loss": 0.4801, "step": 2187 }, { "epoch": 0.9215218306893163, "grad_norm": 0.4320163428783417, "learning_rate": 8.74904759195255e-06, "loss": 0.4272, "step": 2188 }, { "epoch": 0.9219430015442931, "grad_norm": 0.42717304825782776, "learning_rate": 8.747425479440232e-06, "loss": 0.4865, "step": 2189 }, { "epoch": 0.9223641723992699, "grad_norm": 0.44986018538475037, "learning_rate": 8.745802466493262e-06, "loss": 0.4926, "step": 2190 }, { "epoch": 0.9227853432542468, "grad_norm": 0.4216229319572449, "learning_rate": 8.744178553501623e-06, "loss": 0.4949, "step": 2191 }, { "epoch": 0.9232065141092236, "grad_norm": 0.47998976707458496, "learning_rate": 8.742553740855507e-06, "loss": 0.4695, "step": 2192 }, { "epoch": 0.9236276849642004, "grad_norm": 0.4417094588279724, "learning_rate": 8.740928028945326e-06, "loss": 0.502, "step": 2193 }, { "epoch": 0.9240488558191773, "grad_norm": 0.411511093378067, "learning_rate": 8.73930141816171e-06, "loss": 0.4619, "step": 2194 }, { "epoch": 0.9244700266741541, "grad_norm": 0.4599991738796234, "learning_rate": 8.7376739088955e-06, "loss": 0.492, "step": 2195 }, { "epoch": 0.924891197529131, "grad_norm": 0.5002931356430054, "learning_rate": 8.736045501537755e-06, "loss": 0.4802, "step": 2196 }, { "epoch": 0.9253123683841078, "grad_norm": 0.4583019018173218, "learning_rate": 8.73441619647975e-06, "loss": 0.4971, "step": 2197 }, { "epoch": 0.9257335392390846, "grad_norm": 0.43435606360435486, "learning_rate": 8.73278599411298e-06, "loss": 0.4903, "step": 2198 }, { "epoch": 0.9261547100940615, "grad_norm": 0.43863770365715027, "learning_rate": 8.731154894829147e-06, "loss": 0.4653, "step": 2199 }, { "epoch": 0.9265758809490383, "grad_norm": 0.42978519201278687, "learning_rate": 8.729522899020176e-06, "loss": 0.484, "step": 2200 }, { "epoch": 0.9269970518040151, "grad_norm": 0.43098941445350647, "learning_rate": 8.7278900070782e-06, "loss": 0.4705, "step": 2201 }, { "epoch": 0.927418222658992, "grad_norm": 0.5080531239509583, "learning_rate": 8.726256219395577e-06, "loss": 0.5464, "step": 2202 }, { "epoch": 0.9278393935139688, "grad_norm": 0.4450795650482178, "learning_rate": 8.72462153636487e-06, "loss": 0.5022, "step": 2203 }, { "epoch": 0.9282605643689457, "grad_norm": 0.4723255932331085, "learning_rate": 8.722985958378865e-06, "loss": 0.5528, "step": 2204 }, { "epoch": 0.9286817352239225, "grad_norm": 0.45968520641326904, "learning_rate": 8.72134948583056e-06, "loss": 0.5113, "step": 2205 }, { "epoch": 0.9291029060788993, "grad_norm": 0.4729650616645813, "learning_rate": 8.719712119113168e-06, "loss": 0.4827, "step": 2206 }, { "epoch": 0.9295240769338762, "grad_norm": 0.4250435531139374, "learning_rate": 8.718073858620116e-06, "loss": 0.471, "step": 2207 }, { "epoch": 0.929945247788853, "grad_norm": 0.4268702566623688, "learning_rate": 8.716434704745047e-06, "loss": 0.4466, "step": 2208 }, { "epoch": 0.9303664186438299, "grad_norm": 0.500275194644928, "learning_rate": 8.714794657881818e-06, "loss": 0.5374, "step": 2209 }, { "epoch": 0.9307875894988067, "grad_norm": 0.41721776127815247, "learning_rate": 8.713153718424502e-06, "loss": 0.442, "step": 2210 }, { "epoch": 0.9312087603537835, "grad_norm": 0.4628480076789856, "learning_rate": 8.711511886767386e-06, "loss": 0.4781, "step": 2211 }, { "epoch": 0.9316299312087604, "grad_norm": 0.4378197193145752, "learning_rate": 8.709869163304967e-06, "loss": 0.4845, "step": 2212 }, { "epoch": 0.9320511020637372, "grad_norm": 0.4831106662750244, "learning_rate": 8.708225548431964e-06, "loss": 0.5508, "step": 2213 }, { "epoch": 0.932472272918714, "grad_norm": 0.46507665514945984, "learning_rate": 8.706581042543304e-06, "loss": 0.4682, "step": 2214 }, { "epoch": 0.9328934437736909, "grad_norm": 0.4509824514389038, "learning_rate": 8.704935646034132e-06, "loss": 0.4506, "step": 2215 }, { "epoch": 0.9333146146286677, "grad_norm": 0.45953792333602905, "learning_rate": 8.703289359299802e-06, "loss": 0.5081, "step": 2216 }, { "epoch": 0.9337357854836446, "grad_norm": 0.4342181980609894, "learning_rate": 8.70164218273589e-06, "loss": 0.4899, "step": 2217 }, { "epoch": 0.9341569563386214, "grad_norm": 0.4991154074668884, "learning_rate": 8.699994116738176e-06, "loss": 0.4663, "step": 2218 }, { "epoch": 0.9345781271935982, "grad_norm": 0.4139828383922577, "learning_rate": 8.698345161702663e-06, "loss": 0.4815, "step": 2219 }, { "epoch": 0.9349992980485751, "grad_norm": 0.5325157046318054, "learning_rate": 8.696695318025561e-06, "loss": 0.5185, "step": 2220 }, { "epoch": 0.9354204689035519, "grad_norm": 0.44342249631881714, "learning_rate": 8.695044586103297e-06, "loss": 0.4928, "step": 2221 }, { "epoch": 0.9358416397585287, "grad_norm": 0.4345646798610687, "learning_rate": 8.693392966332508e-06, "loss": 0.4664, "step": 2222 }, { "epoch": 0.9362628106135056, "grad_norm": 0.4664803743362427, "learning_rate": 8.69174045911005e-06, "loss": 0.4545, "step": 2223 }, { "epoch": 0.9366839814684824, "grad_norm": 0.4241390526294708, "learning_rate": 8.690087064832985e-06, "loss": 0.5052, "step": 2224 }, { "epoch": 0.9371051523234593, "grad_norm": 0.4643252491950989, "learning_rate": 8.688432783898597e-06, "loss": 0.4848, "step": 2225 }, { "epoch": 0.9375263231784361, "grad_norm": 0.49647724628448486, "learning_rate": 8.686777616704375e-06, "loss": 0.4884, "step": 2226 }, { "epoch": 0.9379474940334129, "grad_norm": 0.44421955943107605, "learning_rate": 8.685121563648024e-06, "loss": 0.4694, "step": 2227 }, { "epoch": 0.9383686648883898, "grad_norm": 0.5071961879730225, "learning_rate": 8.683464625127463e-06, "loss": 0.508, "step": 2228 }, { "epoch": 0.9387898357433666, "grad_norm": 0.4694962501525879, "learning_rate": 8.681806801540819e-06, "loss": 0.5233, "step": 2229 }, { "epoch": 0.9392110065983434, "grad_norm": 0.4410741925239563, "learning_rate": 8.68014809328644e-06, "loss": 0.4528, "step": 2230 }, { "epoch": 0.9396321774533203, "grad_norm": 0.47533079981803894, "learning_rate": 8.678488500762879e-06, "loss": 0.4807, "step": 2231 }, { "epoch": 0.9400533483082971, "grad_norm": 0.5002363324165344, "learning_rate": 8.676828024368907e-06, "loss": 0.5077, "step": 2232 }, { "epoch": 0.9404745191632738, "grad_norm": 0.44979649782180786, "learning_rate": 8.6751666645035e-06, "loss": 0.4788, "step": 2233 }, { "epoch": 0.9408956900182507, "grad_norm": 0.5170558094978333, "learning_rate": 8.673504421565857e-06, "loss": 0.4774, "step": 2234 }, { "epoch": 0.9413168608732275, "grad_norm": 0.4326324164867401, "learning_rate": 8.67184129595538e-06, "loss": 0.5182, "step": 2235 }, { "epoch": 0.9417380317282044, "grad_norm": 0.42369022965431213, "learning_rate": 8.670177288071685e-06, "loss": 0.5362, "step": 2236 }, { "epoch": 0.9421592025831812, "grad_norm": 0.4745965003967285, "learning_rate": 8.668512398314604e-06, "loss": 0.4878, "step": 2237 }, { "epoch": 0.942580373438158, "grad_norm": 0.4560614228248596, "learning_rate": 8.666846627084175e-06, "loss": 0.5096, "step": 2238 }, { "epoch": 0.9430015442931349, "grad_norm": 0.46014511585235596, "learning_rate": 8.665179974780653e-06, "loss": 0.4733, "step": 2239 }, { "epoch": 0.9434227151481117, "grad_norm": 0.45308637619018555, "learning_rate": 8.663512441804505e-06, "loss": 0.4924, "step": 2240 }, { "epoch": 0.9438438860030886, "grad_norm": 0.44905081391334534, "learning_rate": 8.661844028556404e-06, "loss": 0.5086, "step": 2241 }, { "epoch": 0.9442650568580654, "grad_norm": 0.48636841773986816, "learning_rate": 8.660174735437237e-06, "loss": 0.4712, "step": 2242 }, { "epoch": 0.9446862277130422, "grad_norm": 0.42570704221725464, "learning_rate": 8.658504562848104e-06, "loss": 0.4402, "step": 2243 }, { "epoch": 0.9451073985680191, "grad_norm": 0.40875452756881714, "learning_rate": 8.656833511190318e-06, "loss": 0.4497, "step": 2244 }, { "epoch": 0.9455285694229959, "grad_norm": 0.46223530173301697, "learning_rate": 8.655161580865398e-06, "loss": 0.4751, "step": 2245 }, { "epoch": 0.9459497402779727, "grad_norm": 0.4450231194496155, "learning_rate": 8.65348877227508e-06, "loss": 0.4791, "step": 2246 }, { "epoch": 0.9463709111329496, "grad_norm": 0.4567493796348572, "learning_rate": 8.651815085821304e-06, "loss": 0.4743, "step": 2247 }, { "epoch": 0.9467920819879264, "grad_norm": 0.452087938785553, "learning_rate": 8.650140521906226e-06, "loss": 0.4724, "step": 2248 }, { "epoch": 0.9472132528429033, "grad_norm": 0.44262996315956116, "learning_rate": 8.648465080932214e-06, "loss": 0.4744, "step": 2249 }, { "epoch": 0.9476344236978801, "grad_norm": 0.4303051233291626, "learning_rate": 8.646788763301842e-06, "loss": 0.4613, "step": 2250 }, { "epoch": 0.9480555945528569, "grad_norm": 0.46026816964149475, "learning_rate": 8.645111569417897e-06, "loss": 0.4991, "step": 2251 }, { "epoch": 0.9484767654078338, "grad_norm": 0.43980714678764343, "learning_rate": 8.643433499683378e-06, "loss": 0.475, "step": 2252 }, { "epoch": 0.9488979362628106, "grad_norm": 0.44249534606933594, "learning_rate": 8.641754554501494e-06, "loss": 0.4605, "step": 2253 }, { "epoch": 0.9493191071177874, "grad_norm": 0.42479923367500305, "learning_rate": 8.640074734275658e-06, "loss": 0.4622, "step": 2254 }, { "epoch": 0.9497402779727643, "grad_norm": 0.4326286315917969, "learning_rate": 8.638394039409506e-06, "loss": 0.4952, "step": 2255 }, { "epoch": 0.9501614488277411, "grad_norm": 0.464530348777771, "learning_rate": 8.636712470306871e-06, "loss": 0.4641, "step": 2256 }, { "epoch": 0.950582619682718, "grad_norm": 0.42355838418006897, "learning_rate": 8.635030027371805e-06, "loss": 0.4394, "step": 2257 }, { "epoch": 0.9510037905376948, "grad_norm": 0.4191844165325165, "learning_rate": 8.633346711008567e-06, "loss": 0.4694, "step": 2258 }, { "epoch": 0.9514249613926716, "grad_norm": 0.43731072545051575, "learning_rate": 8.631662521621624e-06, "loss": 0.4481, "step": 2259 }, { "epoch": 0.9518461322476485, "grad_norm": 0.45692428946495056, "learning_rate": 8.629977459615655e-06, "loss": 0.4812, "step": 2260 }, { "epoch": 0.9522673031026253, "grad_norm": 0.4800492823123932, "learning_rate": 8.628291525395549e-06, "loss": 0.503, "step": 2261 }, { "epoch": 0.9526884739576021, "grad_norm": 0.4157460331916809, "learning_rate": 8.626604719366402e-06, "loss": 0.4335, "step": 2262 }, { "epoch": 0.953109644812579, "grad_norm": 0.5320693254470825, "learning_rate": 8.624917041933524e-06, "loss": 0.4869, "step": 2263 }, { "epoch": 0.9535308156675558, "grad_norm": 0.4345862865447998, "learning_rate": 8.62322849350243e-06, "loss": 0.4407, "step": 2264 }, { "epoch": 0.9539519865225327, "grad_norm": 0.44916409254074097, "learning_rate": 8.621539074478843e-06, "loss": 0.4852, "step": 2265 }, { "epoch": 0.9543731573775095, "grad_norm": 0.4703916013240814, "learning_rate": 8.619848785268704e-06, "loss": 0.4752, "step": 2266 }, { "epoch": 0.9547943282324863, "grad_norm": 0.45977282524108887, "learning_rate": 8.618157626278152e-06, "loss": 0.5186, "step": 2267 }, { "epoch": 0.9552154990874632, "grad_norm": 0.4416882395744324, "learning_rate": 8.616465597913543e-06, "loss": 0.4875, "step": 2268 }, { "epoch": 0.95563666994244, "grad_norm": 0.4532122015953064, "learning_rate": 8.61477270058144e-06, "loss": 0.4899, "step": 2269 }, { "epoch": 0.9560578407974168, "grad_norm": 0.48470816016197205, "learning_rate": 8.61307893468861e-06, "loss": 0.4961, "step": 2270 }, { "epoch": 0.9564790116523937, "grad_norm": 0.5009462237358093, "learning_rate": 8.611384300642034e-06, "loss": 0.4777, "step": 2271 }, { "epoch": 0.9569001825073705, "grad_norm": 0.4617394506931305, "learning_rate": 8.609688798848902e-06, "loss": 0.5014, "step": 2272 }, { "epoch": 0.9573213533623474, "grad_norm": 0.4398263692855835, "learning_rate": 8.60799242971661e-06, "loss": 0.4899, "step": 2273 }, { "epoch": 0.9577425242173242, "grad_norm": 0.5121272802352905, "learning_rate": 8.606295193652762e-06, "loss": 0.4975, "step": 2274 }, { "epoch": 0.958163695072301, "grad_norm": 0.4578995108604431, "learning_rate": 8.604597091065172e-06, "loss": 0.4892, "step": 2275 }, { "epoch": 0.9585848659272779, "grad_norm": 0.4346058964729309, "learning_rate": 8.602898122361861e-06, "loss": 0.4814, "step": 2276 }, { "epoch": 0.9590060367822547, "grad_norm": 0.4807288646697998, "learning_rate": 8.601198287951059e-06, "loss": 0.4987, "step": 2277 }, { "epoch": 0.9594272076372315, "grad_norm": 0.4530472755432129, "learning_rate": 8.599497588241203e-06, "loss": 0.475, "step": 2278 }, { "epoch": 0.9598483784922084, "grad_norm": 0.4413120150566101, "learning_rate": 8.59779602364094e-06, "loss": 0.4743, "step": 2279 }, { "epoch": 0.9602695493471852, "grad_norm": 0.4438486397266388, "learning_rate": 8.596093594559126e-06, "loss": 0.4733, "step": 2280 }, { "epoch": 0.9606907202021621, "grad_norm": 0.5163366198539734, "learning_rate": 8.594390301404815e-06, "loss": 0.5104, "step": 2281 }, { "epoch": 0.9611118910571389, "grad_norm": 0.46202152967453003, "learning_rate": 8.59268614458728e-06, "loss": 0.4938, "step": 2282 }, { "epoch": 0.9615330619121156, "grad_norm": 0.44006985425949097, "learning_rate": 8.590981124515997e-06, "loss": 0.5055, "step": 2283 }, { "epoch": 0.9619542327670925, "grad_norm": 0.46161967515945435, "learning_rate": 8.589275241600652e-06, "loss": 0.4892, "step": 2284 }, { "epoch": 0.9623754036220693, "grad_norm": 0.43989089131355286, "learning_rate": 8.587568496251133e-06, "loss": 0.508, "step": 2285 }, { "epoch": 0.9627965744770461, "grad_norm": 0.4114464521408081, "learning_rate": 8.585860888877538e-06, "loss": 0.4724, "step": 2286 }, { "epoch": 0.963217745332023, "grad_norm": 0.43635842204093933, "learning_rate": 8.584152419890173e-06, "loss": 0.4819, "step": 2287 }, { "epoch": 0.9636389161869998, "grad_norm": 0.4270971119403839, "learning_rate": 8.582443089699551e-06, "loss": 0.4501, "step": 2288 }, { "epoch": 0.9640600870419767, "grad_norm": 0.4620331823825836, "learning_rate": 8.58073289871639e-06, "loss": 0.4998, "step": 2289 }, { "epoch": 0.9644812578969535, "grad_norm": 0.47025737166404724, "learning_rate": 8.57902184735162e-06, "loss": 0.4771, "step": 2290 }, { "epoch": 0.9649024287519303, "grad_norm": 0.44782549142837524, "learning_rate": 8.577309936016369e-06, "loss": 0.4612, "step": 2291 }, { "epoch": 0.9653235996069072, "grad_norm": 0.452450156211853, "learning_rate": 8.57559716512198e-06, "loss": 0.4861, "step": 2292 }, { "epoch": 0.965744770461884, "grad_norm": 0.5240190625190735, "learning_rate": 8.573883535079997e-06, "loss": 0.5311, "step": 2293 }, { "epoch": 0.9661659413168608, "grad_norm": 0.49423572421073914, "learning_rate": 8.572169046302174e-06, "loss": 0.4811, "step": 2294 }, { "epoch": 0.9665871121718377, "grad_norm": 0.4572683572769165, "learning_rate": 8.57045369920047e-06, "loss": 0.4774, "step": 2295 }, { "epoch": 0.9670082830268145, "grad_norm": 0.4513395428657532, "learning_rate": 8.568737494187047e-06, "loss": 0.4818, "step": 2296 }, { "epoch": 0.9674294538817914, "grad_norm": 0.47851142287254333, "learning_rate": 8.56702043167428e-06, "loss": 0.5005, "step": 2297 }, { "epoch": 0.9678506247367682, "grad_norm": 0.4680686593055725, "learning_rate": 8.565302512074745e-06, "loss": 0.5102, "step": 2298 }, { "epoch": 0.968271795591745, "grad_norm": 0.4411342740058899, "learning_rate": 8.563583735801223e-06, "loss": 0.4848, "step": 2299 }, { "epoch": 0.9686929664467219, "grad_norm": 0.4693925976753235, "learning_rate": 8.561864103266708e-06, "loss": 0.4674, "step": 2300 }, { "epoch": 0.9691141373016987, "grad_norm": 0.4605425298213959, "learning_rate": 8.56014361488439e-06, "loss": 0.4928, "step": 2301 }, { "epoch": 0.9695353081566755, "grad_norm": 0.44557324051856995, "learning_rate": 8.558422271067671e-06, "loss": 0.4662, "step": 2302 }, { "epoch": 0.9699564790116524, "grad_norm": 0.43437278270721436, "learning_rate": 8.556700072230157e-06, "loss": 0.4767, "step": 2303 }, { "epoch": 0.9703776498666292, "grad_norm": 0.4885977804660797, "learning_rate": 8.554977018785662e-06, "loss": 0.5008, "step": 2304 }, { "epoch": 0.9707988207216061, "grad_norm": 0.42070063948631287, "learning_rate": 8.553253111148198e-06, "loss": 0.4367, "step": 2305 }, { "epoch": 0.9712199915765829, "grad_norm": 0.4401445984840393, "learning_rate": 8.551528349731992e-06, "loss": 0.4812, "step": 2306 }, { "epoch": 0.9716411624315597, "grad_norm": 0.4838387966156006, "learning_rate": 8.549802734951467e-06, "loss": 0.4883, "step": 2307 }, { "epoch": 0.9720623332865366, "grad_norm": 0.4359109103679657, "learning_rate": 8.548076267221258e-06, "loss": 0.464, "step": 2308 }, { "epoch": 0.9724835041415134, "grad_norm": 0.4554654061794281, "learning_rate": 8.546348946956199e-06, "loss": 0.4636, "step": 2309 }, { "epoch": 0.9729046749964902, "grad_norm": 0.4129808843135834, "learning_rate": 8.544620774571334e-06, "loss": 0.4721, "step": 2310 }, { "epoch": 0.9733258458514671, "grad_norm": 0.4383682906627655, "learning_rate": 8.542891750481913e-06, "loss": 0.4979, "step": 2311 }, { "epoch": 0.9737470167064439, "grad_norm": 0.4461154043674469, "learning_rate": 8.54116187510338e-06, "loss": 0.4757, "step": 2312 }, { "epoch": 0.9741681875614208, "grad_norm": 0.45120835304260254, "learning_rate": 8.539431148851398e-06, "loss": 0.4574, "step": 2313 }, { "epoch": 0.9745893584163976, "grad_norm": 0.4405539631843567, "learning_rate": 8.537699572141825e-06, "loss": 0.4677, "step": 2314 }, { "epoch": 0.9750105292713744, "grad_norm": 0.46832039952278137, "learning_rate": 8.535967145390724e-06, "loss": 0.427, "step": 2315 }, { "epoch": 0.9754317001263513, "grad_norm": 0.5210520625114441, "learning_rate": 8.534233869014366e-06, "loss": 0.4889, "step": 2316 }, { "epoch": 0.9758528709813281, "grad_norm": 0.4333098530769348, "learning_rate": 8.532499743429224e-06, "loss": 0.4565, "step": 2317 }, { "epoch": 0.976274041836305, "grad_norm": 0.3970520794391632, "learning_rate": 8.530764769051975e-06, "loss": 0.4615, "step": 2318 }, { "epoch": 0.9766952126912818, "grad_norm": 0.42737969756126404, "learning_rate": 8.529028946299499e-06, "loss": 0.4743, "step": 2319 }, { "epoch": 0.9771163835462586, "grad_norm": 0.44615837931632996, "learning_rate": 8.527292275588882e-06, "loss": 0.4852, "step": 2320 }, { "epoch": 0.9775375544012355, "grad_norm": 0.4351244270801544, "learning_rate": 8.525554757337412e-06, "loss": 0.4737, "step": 2321 }, { "epoch": 0.9779587252562123, "grad_norm": 0.43019571900367737, "learning_rate": 8.523816391962582e-06, "loss": 0.4717, "step": 2322 }, { "epoch": 0.9783798961111891, "grad_norm": 0.4285275638103485, "learning_rate": 8.52207717988209e-06, "loss": 0.4515, "step": 2323 }, { "epoch": 0.978801066966166, "grad_norm": 0.4212789535522461, "learning_rate": 8.520337121513832e-06, "loss": 0.4435, "step": 2324 }, { "epoch": 0.9792222378211428, "grad_norm": 0.43709704279899597, "learning_rate": 8.518596217275912e-06, "loss": 0.4568, "step": 2325 }, { "epoch": 0.9796434086761197, "grad_norm": 0.42350780963897705, "learning_rate": 8.516854467586635e-06, "loss": 0.4645, "step": 2326 }, { "epoch": 0.9800645795310965, "grad_norm": 0.4461718499660492, "learning_rate": 8.515111872864514e-06, "loss": 0.4941, "step": 2327 }, { "epoch": 0.9804857503860733, "grad_norm": 0.44799134135246277, "learning_rate": 8.513368433528255e-06, "loss": 0.4775, "step": 2328 }, { "epoch": 0.9809069212410502, "grad_norm": 0.436309814453125, "learning_rate": 8.51162414999678e-06, "loss": 0.4564, "step": 2329 }, { "epoch": 0.981328092096027, "grad_norm": 0.4363122284412384, "learning_rate": 8.509879022689201e-06, "loss": 0.4641, "step": 2330 }, { "epoch": 0.9817492629510038, "grad_norm": 0.42415210604667664, "learning_rate": 8.508133052024843e-06, "loss": 0.4932, "step": 2331 }, { "epoch": 0.9821704338059807, "grad_norm": 0.4456348717212677, "learning_rate": 8.506386238423226e-06, "loss": 0.4525, "step": 2332 }, { "epoch": 0.9825916046609574, "grad_norm": 0.37461477518081665, "learning_rate": 8.504638582304077e-06, "loss": 0.4591, "step": 2333 }, { "epoch": 0.9830127755159342, "grad_norm": 0.46923983097076416, "learning_rate": 8.502890084087324e-06, "loss": 0.495, "step": 2334 }, { "epoch": 0.9834339463709111, "grad_norm": 0.460021048784256, "learning_rate": 8.501140744193102e-06, "loss": 0.4728, "step": 2335 }, { "epoch": 0.9838551172258879, "grad_norm": 0.4567093849182129, "learning_rate": 8.499390563041738e-06, "loss": 0.4838, "step": 2336 }, { "epoch": 0.9842762880808648, "grad_norm": 0.441569447517395, "learning_rate": 8.497639541053769e-06, "loss": 0.4907, "step": 2337 }, { "epoch": 0.9846974589358416, "grad_norm": 0.44592103362083435, "learning_rate": 8.495887678649933e-06, "loss": 0.4835, "step": 2338 }, { "epoch": 0.9851186297908184, "grad_norm": 0.4420660734176636, "learning_rate": 8.49413497625117e-06, "loss": 0.4634, "step": 2339 }, { "epoch": 0.9855398006457953, "grad_norm": 0.4552556276321411, "learning_rate": 8.492381434278617e-06, "loss": 0.4778, "step": 2340 }, { "epoch": 0.9859609715007721, "grad_norm": 0.44114744663238525, "learning_rate": 8.49062705315362e-06, "loss": 0.4819, "step": 2341 }, { "epoch": 0.986382142355749, "grad_norm": 0.43052908778190613, "learning_rate": 8.488871833297722e-06, "loss": 0.4579, "step": 2342 }, { "epoch": 0.9868033132107258, "grad_norm": 0.4170188903808594, "learning_rate": 8.487115775132669e-06, "loss": 0.4394, "step": 2343 }, { "epoch": 0.9872244840657026, "grad_norm": 0.481594055891037, "learning_rate": 8.485358879080407e-06, "loss": 0.4692, "step": 2344 }, { "epoch": 0.9876456549206795, "grad_norm": 0.4547625482082367, "learning_rate": 8.483601145563087e-06, "loss": 0.4956, "step": 2345 }, { "epoch": 0.9880668257756563, "grad_norm": 0.4372372031211853, "learning_rate": 8.481842575003057e-06, "loss": 0.4794, "step": 2346 }, { "epoch": 0.9884879966306331, "grad_norm": 0.49479806423187256, "learning_rate": 8.480083167822867e-06, "loss": 0.5066, "step": 2347 }, { "epoch": 0.98890916748561, "grad_norm": 0.5049002766609192, "learning_rate": 8.47832292444527e-06, "loss": 0.4651, "step": 2348 }, { "epoch": 0.9893303383405868, "grad_norm": 0.47739896178245544, "learning_rate": 8.47656184529322e-06, "loss": 0.4793, "step": 2349 }, { "epoch": 0.9897515091955637, "grad_norm": 0.43240493535995483, "learning_rate": 8.47479993078987e-06, "loss": 0.4766, "step": 2350 }, { "epoch": 0.9901726800505405, "grad_norm": 0.47263163328170776, "learning_rate": 8.473037181358573e-06, "loss": 0.4977, "step": 2351 }, { "epoch": 0.9905938509055173, "grad_norm": 0.47774818539619446, "learning_rate": 8.471273597422886e-06, "loss": 0.4704, "step": 2352 }, { "epoch": 0.9910150217604942, "grad_norm": 0.47776755690574646, "learning_rate": 8.469509179406561e-06, "loss": 0.47, "step": 2353 }, { "epoch": 0.991436192615471, "grad_norm": 0.4691259562969208, "learning_rate": 8.467743927733559e-06, "loss": 0.4724, "step": 2354 }, { "epoch": 0.9918573634704478, "grad_norm": 0.4539393186569214, "learning_rate": 8.46597784282803e-06, "loss": 0.4902, "step": 2355 }, { "epoch": 0.9922785343254247, "grad_norm": 0.47940483689308167, "learning_rate": 8.464210925114335e-06, "loss": 0.4974, "step": 2356 }, { "epoch": 0.9926997051804015, "grad_norm": 0.525550365447998, "learning_rate": 8.462443175017032e-06, "loss": 0.5584, "step": 2357 }, { "epoch": 0.9931208760353784, "grad_norm": 0.44136765599250793, "learning_rate": 8.460674592960872e-06, "loss": 0.4725, "step": 2358 }, { "epoch": 0.9935420468903552, "grad_norm": 0.4223807156085968, "learning_rate": 8.458905179370816e-06, "loss": 0.4948, "step": 2359 }, { "epoch": 0.993963217745332, "grad_norm": 0.43894124031066895, "learning_rate": 8.457134934672018e-06, "loss": 0.5, "step": 2360 }, { "epoch": 0.9943843886003089, "grad_norm": 0.4917190968990326, "learning_rate": 8.455363859289833e-06, "loss": 0.4935, "step": 2361 }, { "epoch": 0.9948055594552857, "grad_norm": 0.47742730379104614, "learning_rate": 8.45359195364982e-06, "loss": 0.5056, "step": 2362 }, { "epoch": 0.9952267303102625, "grad_norm": 0.472075879573822, "learning_rate": 8.45181921817773e-06, "loss": 0.5027, "step": 2363 }, { "epoch": 0.9956479011652394, "grad_norm": 0.4466184377670288, "learning_rate": 8.450045653299521e-06, "loss": 0.4936, "step": 2364 }, { "epoch": 0.9960690720202162, "grad_norm": 0.49828192591667175, "learning_rate": 8.448271259441344e-06, "loss": 0.4867, "step": 2365 }, { "epoch": 0.996490242875193, "grad_norm": 0.5410318970680237, "learning_rate": 8.446496037029555e-06, "loss": 0.4989, "step": 2366 }, { "epoch": 0.9969114137301699, "grad_norm": 0.49047863483428955, "learning_rate": 8.4447199864907e-06, "loss": 0.493, "step": 2367 }, { "epoch": 0.9973325845851467, "grad_norm": 0.4736546576023102, "learning_rate": 8.442943108251538e-06, "loss": 0.5056, "step": 2368 }, { "epoch": 0.9977537554401236, "grad_norm": 0.4608830213546753, "learning_rate": 8.441165402739014e-06, "loss": 0.4736, "step": 2369 }, { "epoch": 0.9981749262951004, "grad_norm": 0.5045085549354553, "learning_rate": 8.439386870380278e-06, "loss": 0.4834, "step": 2370 }, { "epoch": 0.9985960971500772, "grad_norm": 0.4055655896663666, "learning_rate": 8.437607511602675e-06, "loss": 0.4677, "step": 2371 }, { "epoch": 0.9990172680050541, "grad_norm": 0.4109354019165039, "learning_rate": 8.435827326833755e-06, "loss": 0.4685, "step": 2372 }, { "epoch": 0.9994384388600309, "grad_norm": 0.40643683075904846, "learning_rate": 8.434046316501257e-06, "loss": 0.4651, "step": 2373 }, { "epoch": 0.9998596097150078, "grad_norm": 0.4523998498916626, "learning_rate": 8.432264481033128e-06, "loss": 0.4852, "step": 2374 }, { "epoch": 1.0002807805699845, "grad_norm": 0.8392765522003174, "learning_rate": 8.430481820857508e-06, "loss": 0.7142, "step": 2375 }, { "epoch": 1.0007019514249613, "grad_norm": 0.4421597123146057, "learning_rate": 8.428698336402736e-06, "loss": 0.4558, "step": 2376 }, { "epoch": 1.0011231222799382, "grad_norm": 0.4007064700126648, "learning_rate": 8.426914028097349e-06, "loss": 0.3894, "step": 2377 }, { "epoch": 1.001544293134915, "grad_norm": 0.6129955053329468, "learning_rate": 8.42512889637008e-06, "loss": 0.5396, "step": 2378 }, { "epoch": 1.0019654639898918, "grad_norm": 0.40124499797821045, "learning_rate": 8.423342941649866e-06, "loss": 0.3922, "step": 2379 }, { "epoch": 1.0023866348448687, "grad_norm": 0.49290627241134644, "learning_rate": 8.421556164365836e-06, "loss": 0.4291, "step": 2380 }, { "epoch": 1.0028078056998455, "grad_norm": 0.5747617483139038, "learning_rate": 8.41976856494732e-06, "loss": 0.529, "step": 2381 }, { "epoch": 1.0032289765548223, "grad_norm": 0.4603651463985443, "learning_rate": 8.417980143823838e-06, "loss": 0.4647, "step": 2382 }, { "epoch": 1.0036501474097992, "grad_norm": 0.45297688245773315, "learning_rate": 8.41619090142512e-06, "loss": 0.4001, "step": 2383 }, { "epoch": 1.004071318264776, "grad_norm": 0.5306430459022522, "learning_rate": 8.414400838181086e-06, "loss": 0.4546, "step": 2384 }, { "epoch": 1.0044924891197529, "grad_norm": 0.4797383248806, "learning_rate": 8.41260995452185e-06, "loss": 0.4796, "step": 2385 }, { "epoch": 1.0049136599747297, "grad_norm": 0.41780975461006165, "learning_rate": 8.410818250877729e-06, "loss": 0.3724, "step": 2386 }, { "epoch": 1.0053348308297065, "grad_norm": 0.47834014892578125, "learning_rate": 8.409025727679236e-06, "loss": 0.4414, "step": 2387 }, { "epoch": 1.0057560016846834, "grad_norm": 0.4402569532394409, "learning_rate": 8.407232385357081e-06, "loss": 0.4193, "step": 2388 }, { "epoch": 1.0061771725396602, "grad_norm": 0.5064718127250671, "learning_rate": 8.405438224342166e-06, "loss": 0.5223, "step": 2389 }, { "epoch": 1.006598343394637, "grad_norm": 0.39356064796447754, "learning_rate": 8.403643245065598e-06, "loss": 0.3692, "step": 2390 }, { "epoch": 1.007019514249614, "grad_norm": 0.43134018778800964, "learning_rate": 8.401847447958673e-06, "loss": 0.4432, "step": 2391 }, { "epoch": 1.0074406851045907, "grad_norm": 0.4634217619895935, "learning_rate": 8.400050833452889e-06, "loss": 0.452, "step": 2392 }, { "epoch": 1.0078618559595676, "grad_norm": 0.45665934681892395, "learning_rate": 8.398253401979935e-06, "loss": 0.4558, "step": 2393 }, { "epoch": 1.0082830268145444, "grad_norm": 0.4766361713409424, "learning_rate": 8.396455153971703e-06, "loss": 0.4618, "step": 2394 }, { "epoch": 1.0087041976695212, "grad_norm": 0.4478599429130554, "learning_rate": 8.394656089860274e-06, "loss": 0.4693, "step": 2395 }, { "epoch": 1.009125368524498, "grad_norm": 0.4468688666820526, "learning_rate": 8.392856210077932e-06, "loss": 0.3994, "step": 2396 }, { "epoch": 1.009546539379475, "grad_norm": 0.4902365505695343, "learning_rate": 8.391055515057152e-06, "loss": 0.5153, "step": 2397 }, { "epoch": 1.0099677102344518, "grad_norm": 0.40513381361961365, "learning_rate": 8.389254005230607e-06, "loss": 0.3622, "step": 2398 }, { "epoch": 1.0103888810894286, "grad_norm": 0.5200955271720886, "learning_rate": 8.387451681031164e-06, "loss": 0.4981, "step": 2399 }, { "epoch": 1.0108100519444054, "grad_norm": 0.4852186441421509, "learning_rate": 8.38564854289189e-06, "loss": 0.4699, "step": 2400 }, { "epoch": 1.0112312227993823, "grad_norm": 0.480666846036911, "learning_rate": 8.383844591246042e-06, "loss": 0.4754, "step": 2401 }, { "epoch": 1.011652393654359, "grad_norm": 0.42481476068496704, "learning_rate": 8.382039826527075e-06, "loss": 0.4432, "step": 2402 }, { "epoch": 1.012073564509336, "grad_norm": 0.45642784237861633, "learning_rate": 8.380234249168642e-06, "loss": 0.4248, "step": 2403 }, { "epoch": 1.0124947353643128, "grad_norm": 0.43380841612815857, "learning_rate": 8.378427859604585e-06, "loss": 0.3758, "step": 2404 }, { "epoch": 1.0129159062192896, "grad_norm": 0.5064688324928284, "learning_rate": 8.376620658268948e-06, "loss": 0.4891, "step": 2405 }, { "epoch": 1.0133370770742665, "grad_norm": 0.4539426565170288, "learning_rate": 8.374812645595967e-06, "loss": 0.4135, "step": 2406 }, { "epoch": 1.0137582479292433, "grad_norm": 0.4870453476905823, "learning_rate": 8.373003822020069e-06, "loss": 0.4743, "step": 2407 }, { "epoch": 1.0141794187842201, "grad_norm": 0.5058608651161194, "learning_rate": 8.371194187975883e-06, "loss": 0.4825, "step": 2408 }, { "epoch": 1.014600589639197, "grad_norm": 0.4747130870819092, "learning_rate": 8.36938374389823e-06, "loss": 0.4194, "step": 2409 }, { "epoch": 1.0150217604941738, "grad_norm": 0.4790617525577545, "learning_rate": 8.367572490222122e-06, "loss": 0.4631, "step": 2410 }, { "epoch": 1.0154429313491506, "grad_norm": 0.4788684546947479, "learning_rate": 8.365760427382772e-06, "loss": 0.4248, "step": 2411 }, { "epoch": 1.0158641022041275, "grad_norm": 0.5151650309562683, "learning_rate": 8.363947555815581e-06, "loss": 0.4833, "step": 2412 }, { "epoch": 1.0162852730591043, "grad_norm": 0.4532051086425781, "learning_rate": 8.36213387595615e-06, "loss": 0.4451, "step": 2413 }, { "epoch": 1.0167064439140812, "grad_norm": 0.45485720038414, "learning_rate": 8.36031938824027e-06, "loss": 0.4183, "step": 2414 }, { "epoch": 1.017127614769058, "grad_norm": 0.4704437255859375, "learning_rate": 8.35850409310393e-06, "loss": 0.4343, "step": 2415 }, { "epoch": 1.0175487856240348, "grad_norm": 0.4898248016834259, "learning_rate": 8.356687990983305e-06, "loss": 0.4467, "step": 2416 }, { "epoch": 1.0179699564790117, "grad_norm": 0.4608704149723053, "learning_rate": 8.354871082314776e-06, "loss": 0.4548, "step": 2417 }, { "epoch": 1.0183911273339885, "grad_norm": 0.5044648051261902, "learning_rate": 8.353053367534909e-06, "loss": 0.519, "step": 2418 }, { "epoch": 1.0188122981889653, "grad_norm": 0.41240033507347107, "learning_rate": 8.351234847080464e-06, "loss": 0.3699, "step": 2419 }, { "epoch": 1.0192334690439422, "grad_norm": 0.436786949634552, "learning_rate": 8.3494155213884e-06, "loss": 0.4409, "step": 2420 }, { "epoch": 1.019654639898919, "grad_norm": 0.4382971525192261, "learning_rate": 8.347595390895863e-06, "loss": 0.4256, "step": 2421 }, { "epoch": 1.0200758107538959, "grad_norm": 0.45996999740600586, "learning_rate": 8.345774456040199e-06, "loss": 0.4553, "step": 2422 }, { "epoch": 1.0204969816088727, "grad_norm": 0.4462526738643646, "learning_rate": 8.34395271725894e-06, "loss": 0.453, "step": 2423 }, { "epoch": 1.0209181524638495, "grad_norm": 0.5076442956924438, "learning_rate": 8.342130174989819e-06, "loss": 0.5182, "step": 2424 }, { "epoch": 1.0213393233188264, "grad_norm": 0.44894710183143616, "learning_rate": 8.340306829670754e-06, "loss": 0.4468, "step": 2425 }, { "epoch": 1.0217604941738032, "grad_norm": 0.43831223249435425, "learning_rate": 8.338482681739864e-06, "loss": 0.4476, "step": 2426 }, { "epoch": 1.02218166502878, "grad_norm": 0.4310161769390106, "learning_rate": 8.336657731635454e-06, "loss": 0.4163, "step": 2427 }, { "epoch": 1.0226028358837569, "grad_norm": 0.4357665479183197, "learning_rate": 8.334831979796026e-06, "loss": 0.4381, "step": 2428 }, { "epoch": 1.0230240067387337, "grad_norm": 0.4599623382091522, "learning_rate": 8.333005426660272e-06, "loss": 0.4313, "step": 2429 }, { "epoch": 1.0234451775937106, "grad_norm": 0.450198769569397, "learning_rate": 8.331178072667079e-06, "loss": 0.4447, "step": 2430 }, { "epoch": 1.0238663484486874, "grad_norm": 0.4759467542171478, "learning_rate": 8.329349918255525e-06, "loss": 0.5093, "step": 2431 }, { "epoch": 1.0242875193036642, "grad_norm": 0.42786040902137756, "learning_rate": 8.327520963864878e-06, "loss": 0.3989, "step": 2432 }, { "epoch": 1.024708690158641, "grad_norm": 0.4251486659049988, "learning_rate": 8.325691209934605e-06, "loss": 0.4589, "step": 2433 }, { "epoch": 1.025129861013618, "grad_norm": 0.417835533618927, "learning_rate": 8.32386065690436e-06, "loss": 0.4606, "step": 2434 }, { "epoch": 1.0255510318685948, "grad_norm": 0.4303213357925415, "learning_rate": 8.322029305213986e-06, "loss": 0.437, "step": 2435 }, { "epoch": 1.0259722027235716, "grad_norm": 0.5072858333587646, "learning_rate": 8.320197155303527e-06, "loss": 0.5502, "step": 2436 }, { "epoch": 1.0263933735785484, "grad_norm": 0.49158161878585815, "learning_rate": 8.318364207613208e-06, "loss": 0.4856, "step": 2437 }, { "epoch": 1.0268145444335253, "grad_norm": 0.43192780017852783, "learning_rate": 8.316530462583457e-06, "loss": 0.4165, "step": 2438 }, { "epoch": 1.027235715288502, "grad_norm": 0.4230997562408447, "learning_rate": 8.314695920654884e-06, "loss": 0.4173, "step": 2439 }, { "epoch": 1.027656886143479, "grad_norm": 0.41682592034339905, "learning_rate": 8.312860582268295e-06, "loss": 0.4553, "step": 2440 }, { "epoch": 1.0280780569984558, "grad_norm": 0.45025551319122314, "learning_rate": 8.311024447864688e-06, "loss": 0.403, "step": 2441 }, { "epoch": 1.0284992278534326, "grad_norm": 0.4858638644218445, "learning_rate": 8.30918751788525e-06, "loss": 0.4953, "step": 2442 }, { "epoch": 1.0289203987084095, "grad_norm": 0.47475019097328186, "learning_rate": 8.307349792771361e-06, "loss": 0.4194, "step": 2443 }, { "epoch": 1.0293415695633863, "grad_norm": 0.4682968854904175, "learning_rate": 8.305511272964589e-06, "loss": 0.4818, "step": 2444 }, { "epoch": 1.0297627404183631, "grad_norm": 0.5565170645713806, "learning_rate": 8.303671958906698e-06, "loss": 0.4894, "step": 2445 }, { "epoch": 1.03018391127334, "grad_norm": 0.42361611127853394, "learning_rate": 8.301831851039637e-06, "loss": 0.4012, "step": 2446 }, { "epoch": 1.0306050821283168, "grad_norm": 0.4238909184932709, "learning_rate": 8.299990949805551e-06, "loss": 0.3946, "step": 2447 }, { "epoch": 1.0310262529832936, "grad_norm": 0.5862597227096558, "learning_rate": 8.298149255646775e-06, "loss": 0.4982, "step": 2448 }, { "epoch": 1.0314474238382705, "grad_norm": 0.495968759059906, "learning_rate": 8.296306769005828e-06, "loss": 0.4611, "step": 2449 }, { "epoch": 1.0318685946932473, "grad_norm": 0.42187559604644775, "learning_rate": 8.294463490325427e-06, "loss": 0.4133, "step": 2450 }, { "epoch": 1.0322897655482242, "grad_norm": 0.42713260650634766, "learning_rate": 8.292619420048478e-06, "loss": 0.4003, "step": 2451 }, { "epoch": 1.032710936403201, "grad_norm": 0.4586465358734131, "learning_rate": 8.29077455861807e-06, "loss": 0.4258, "step": 2452 }, { "epoch": 1.0331321072581778, "grad_norm": 0.4448077082633972, "learning_rate": 8.288928906477497e-06, "loss": 0.4378, "step": 2453 }, { "epoch": 1.0335532781131547, "grad_norm": 0.4078882336616516, "learning_rate": 8.287082464070226e-06, "loss": 0.4003, "step": 2454 }, { "epoch": 1.0339744489681315, "grad_norm": 0.4274666905403137, "learning_rate": 8.285235231839928e-06, "loss": 0.4656, "step": 2455 }, { "epoch": 1.0343956198231083, "grad_norm": 0.48873332142829895, "learning_rate": 8.28338721023045e-06, "loss": 0.4496, "step": 2456 }, { "epoch": 1.034816790678085, "grad_norm": 0.4370409846305847, "learning_rate": 8.281538399685845e-06, "loss": 0.4287, "step": 2457 }, { "epoch": 1.035237961533062, "grad_norm": 0.4910830855369568, "learning_rate": 8.279688800650341e-06, "loss": 0.4985, "step": 2458 }, { "epoch": 1.0356591323880386, "grad_norm": 0.42900773882865906, "learning_rate": 8.277838413568363e-06, "loss": 0.4458, "step": 2459 }, { "epoch": 1.0360803032430155, "grad_norm": 0.4460070729255676, "learning_rate": 8.275987238884519e-06, "loss": 0.4961, "step": 2460 }, { "epoch": 1.0365014740979923, "grad_norm": 0.4684588313102722, "learning_rate": 8.27413527704362e-06, "loss": 0.3969, "step": 2461 }, { "epoch": 1.0369226449529692, "grad_norm": 0.42434409260749817, "learning_rate": 8.272282528490652e-06, "loss": 0.4575, "step": 2462 }, { "epoch": 1.037343815807946, "grad_norm": 0.4132627546787262, "learning_rate": 8.270428993670794e-06, "loss": 0.4392, "step": 2463 }, { "epoch": 1.0377649866629228, "grad_norm": 0.47391989827156067, "learning_rate": 8.268574673029415e-06, "loss": 0.5141, "step": 2464 }, { "epoch": 1.0381861575178997, "grad_norm": 0.39948514103889465, "learning_rate": 8.266719567012076e-06, "loss": 0.3757, "step": 2465 }, { "epoch": 1.0386073283728765, "grad_norm": 0.4497124254703522, "learning_rate": 8.26486367606452e-06, "loss": 0.4513, "step": 2466 }, { "epoch": 1.0390284992278533, "grad_norm": 0.477783203125, "learning_rate": 8.263007000632684e-06, "loss": 0.4314, "step": 2467 }, { "epoch": 1.0394496700828302, "grad_norm": 0.4381953775882721, "learning_rate": 8.261149541162693e-06, "loss": 0.4493, "step": 2468 }, { "epoch": 1.039870840937807, "grad_norm": 0.4916861951351166, "learning_rate": 8.259291298100855e-06, "loss": 0.4789, "step": 2469 }, { "epoch": 1.0402920117927839, "grad_norm": 0.5215486288070679, "learning_rate": 8.25743227189367e-06, "loss": 0.4272, "step": 2470 }, { "epoch": 1.0407131826477607, "grad_norm": 0.44505664706230164, "learning_rate": 8.255572462987832e-06, "loss": 0.4116, "step": 2471 }, { "epoch": 1.0411343535027375, "grad_norm": 0.42384132742881775, "learning_rate": 8.253711871830215e-06, "loss": 0.4577, "step": 2472 }, { "epoch": 1.0415555243577144, "grad_norm": 0.5435899496078491, "learning_rate": 8.251850498867882e-06, "loss": 0.4964, "step": 2473 }, { "epoch": 1.0419766952126912, "grad_norm": 0.44404682517051697, "learning_rate": 8.249988344548084e-06, "loss": 0.4316, "step": 2474 }, { "epoch": 1.042397866067668, "grad_norm": 0.4435637295246124, "learning_rate": 8.248125409318265e-06, "loss": 0.4887, "step": 2475 }, { "epoch": 1.0428190369226449, "grad_norm": 0.4406539499759674, "learning_rate": 8.24626169362605e-06, "loss": 0.4651, "step": 2476 }, { "epoch": 1.0432402077776217, "grad_norm": 0.49207815527915955, "learning_rate": 8.244397197919255e-06, "loss": 0.4834, "step": 2477 }, { "epoch": 1.0436613786325986, "grad_norm": 0.46872860193252563, "learning_rate": 8.242531922645884e-06, "loss": 0.4705, "step": 2478 }, { "epoch": 1.0440825494875754, "grad_norm": 0.431892067193985, "learning_rate": 8.240665868254124e-06, "loss": 0.3869, "step": 2479 }, { "epoch": 1.0445037203425522, "grad_norm": 0.4591068625450134, "learning_rate": 8.238799035192356e-06, "loss": 0.4528, "step": 2480 }, { "epoch": 1.044924891197529, "grad_norm": 0.4569295644760132, "learning_rate": 8.23693142390914e-06, "loss": 0.4845, "step": 2481 }, { "epoch": 1.045346062052506, "grad_norm": 0.4105842113494873, "learning_rate": 8.235063034853228e-06, "loss": 0.3883, "step": 2482 }, { "epoch": 1.0457672329074827, "grad_norm": 0.37778159976005554, "learning_rate": 8.23319386847356e-06, "loss": 0.3921, "step": 2483 }, { "epoch": 1.0461884037624596, "grad_norm": 0.48972535133361816, "learning_rate": 8.231323925219264e-06, "loss": 0.5331, "step": 2484 }, { "epoch": 1.0466095746174364, "grad_norm": 0.43257957696914673, "learning_rate": 8.229453205539646e-06, "loss": 0.4188, "step": 2485 }, { "epoch": 1.0470307454724133, "grad_norm": 0.4322459399700165, "learning_rate": 8.227581709884205e-06, "loss": 0.3968, "step": 2486 }, { "epoch": 1.04745191632739, "grad_norm": 0.5019908547401428, "learning_rate": 8.225709438702627e-06, "loss": 0.4268, "step": 2487 }, { "epoch": 1.047873087182367, "grad_norm": 0.46131300926208496, "learning_rate": 8.223836392444784e-06, "loss": 0.4892, "step": 2488 }, { "epoch": 1.0482942580373438, "grad_norm": 0.4896682798862457, "learning_rate": 8.22196257156073e-06, "loss": 0.4806, "step": 2489 }, { "epoch": 1.0487154288923206, "grad_norm": 0.5026336908340454, "learning_rate": 8.220087976500712e-06, "loss": 0.4613, "step": 2490 }, { "epoch": 1.0491365997472974, "grad_norm": 0.41662633419036865, "learning_rate": 8.218212607715157e-06, "loss": 0.4023, "step": 2491 }, { "epoch": 1.0495577706022743, "grad_norm": 0.4269006550312042, "learning_rate": 8.21633646565468e-06, "loss": 0.4502, "step": 2492 }, { "epoch": 1.0499789414572511, "grad_norm": 0.40429604053497314, "learning_rate": 8.214459550770084e-06, "loss": 0.3926, "step": 2493 }, { "epoch": 1.050400112312228, "grad_norm": 0.47313469648361206, "learning_rate": 8.212581863512354e-06, "loss": 0.4624, "step": 2494 }, { "epoch": 1.0508212831672048, "grad_norm": 0.5020278692245483, "learning_rate": 8.210703404332662e-06, "loss": 0.5373, "step": 2495 }, { "epoch": 1.0512424540221816, "grad_norm": 0.45863577723503113, "learning_rate": 8.208824173682368e-06, "loss": 0.4619, "step": 2496 }, { "epoch": 1.0516636248771585, "grad_norm": 0.4465801417827606, "learning_rate": 8.206944172013013e-06, "loss": 0.465, "step": 2497 }, { "epoch": 1.0520847957321353, "grad_norm": 0.49859869480133057, "learning_rate": 8.205063399776326e-06, "loss": 0.4759, "step": 2498 }, { "epoch": 1.0525059665871122, "grad_norm": 0.4272654056549072, "learning_rate": 8.20318185742422e-06, "loss": 0.4317, "step": 2499 }, { "epoch": 1.052927137442089, "grad_norm": 0.43140795826911926, "learning_rate": 8.201299545408799e-06, "loss": 0.4323, "step": 2500 }, { "epoch": 1.0533483082970658, "grad_norm": 0.3922896385192871, "learning_rate": 8.199416464182338e-06, "loss": 0.4532, "step": 2501 }, { "epoch": 1.0537694791520427, "grad_norm": 0.5118197798728943, "learning_rate": 8.197532614197309e-06, "loss": 0.5042, "step": 2502 }, { "epoch": 1.0541906500070195, "grad_norm": 0.48830464482307434, "learning_rate": 8.195647995906367e-06, "loss": 0.5099, "step": 2503 }, { "epoch": 1.0546118208619963, "grad_norm": 0.4176122546195984, "learning_rate": 8.193762609762348e-06, "loss": 0.3746, "step": 2504 }, { "epoch": 1.0550329917169732, "grad_norm": 0.5323330760002136, "learning_rate": 8.191876456218276e-06, "loss": 0.5091, "step": 2505 }, { "epoch": 1.05545416257195, "grad_norm": 0.43391135334968567, "learning_rate": 8.189989535727354e-06, "loss": 0.4297, "step": 2506 }, { "epoch": 1.0558753334269269, "grad_norm": 0.4483187198638916, "learning_rate": 8.188101848742976e-06, "loss": 0.4644, "step": 2507 }, { "epoch": 1.0562965042819037, "grad_norm": 0.41242122650146484, "learning_rate": 8.186213395718714e-06, "loss": 0.3969, "step": 2508 }, { "epoch": 1.0567176751368805, "grad_norm": 0.4458988606929779, "learning_rate": 8.184324177108331e-06, "loss": 0.4258, "step": 2509 }, { "epoch": 1.0571388459918574, "grad_norm": 0.46922600269317627, "learning_rate": 8.18243419336577e-06, "loss": 0.4769, "step": 2510 }, { "epoch": 1.0575600168468342, "grad_norm": 0.4627678692340851, "learning_rate": 8.180543444945154e-06, "loss": 0.4352, "step": 2511 }, { "epoch": 1.057981187701811, "grad_norm": 0.44792240858078003, "learning_rate": 8.178651932300796e-06, "loss": 0.4592, "step": 2512 }, { "epoch": 1.0584023585567879, "grad_norm": 0.39513257145881653, "learning_rate": 8.176759655887192e-06, "loss": 0.3931, "step": 2513 }, { "epoch": 1.0588235294117647, "grad_norm": 0.43155035376548767, "learning_rate": 8.174866616159015e-06, "loss": 0.4361, "step": 2514 }, { "epoch": 1.0592447002667416, "grad_norm": 0.4987376630306244, "learning_rate": 8.172972813571132e-06, "loss": 0.4862, "step": 2515 }, { "epoch": 1.0596658711217184, "grad_norm": 0.41642311215400696, "learning_rate": 8.171078248578583e-06, "loss": 0.4568, "step": 2516 }, { "epoch": 1.0600870419766952, "grad_norm": 0.46569326519966125, "learning_rate": 8.169182921636598e-06, "loss": 0.4845, "step": 2517 }, { "epoch": 1.060508212831672, "grad_norm": 0.46525877714157104, "learning_rate": 8.167286833200586e-06, "loss": 0.4631, "step": 2518 }, { "epoch": 1.060929383686649, "grad_norm": 0.43016892671585083, "learning_rate": 8.165389983726145e-06, "loss": 0.4116, "step": 2519 }, { "epoch": 1.0613505545416257, "grad_norm": 0.46083903312683105, "learning_rate": 8.16349237366905e-06, "loss": 0.4677, "step": 2520 }, { "epoch": 1.0617717253966026, "grad_norm": 0.45700517296791077, "learning_rate": 8.161594003485254e-06, "loss": 0.4287, "step": 2521 }, { "epoch": 1.0621928962515794, "grad_norm": 0.40911388397216797, "learning_rate": 8.159694873630906e-06, "loss": 0.4133, "step": 2522 }, { "epoch": 1.0626140671065563, "grad_norm": 0.48746559023857117, "learning_rate": 8.157794984562331e-06, "loss": 0.4784, "step": 2523 }, { "epoch": 1.063035237961533, "grad_norm": 0.5368577241897583, "learning_rate": 8.15589433673603e-06, "loss": 0.4565, "step": 2524 }, { "epoch": 1.06345640881651, "grad_norm": 0.4242725670337677, "learning_rate": 8.153992930608699e-06, "loss": 0.4437, "step": 2525 }, { "epoch": 1.0638775796714868, "grad_norm": 0.43969234824180603, "learning_rate": 8.152090766637207e-06, "loss": 0.447, "step": 2526 }, { "epoch": 1.0642987505264636, "grad_norm": 0.43875008821487427, "learning_rate": 8.150187845278605e-06, "loss": 0.4413, "step": 2527 }, { "epoch": 1.0647199213814404, "grad_norm": 0.46602797508239746, "learning_rate": 8.14828416699013e-06, "loss": 0.4712, "step": 2528 }, { "epoch": 1.0651410922364173, "grad_norm": 0.3922511041164398, "learning_rate": 8.146379732229201e-06, "loss": 0.4268, "step": 2529 }, { "epoch": 1.0655622630913941, "grad_norm": 0.4642902612686157, "learning_rate": 8.144474541453418e-06, "loss": 0.4514, "step": 2530 }, { "epoch": 1.065983433946371, "grad_norm": 0.41600295901298523, "learning_rate": 8.142568595120561e-06, "loss": 0.4112, "step": 2531 }, { "epoch": 1.0664046048013478, "grad_norm": 0.4604429006576538, "learning_rate": 8.14066189368859e-06, "loss": 0.4374, "step": 2532 }, { "epoch": 1.0668257756563246, "grad_norm": 0.5329805612564087, "learning_rate": 8.138754437615652e-06, "loss": 0.4817, "step": 2533 }, { "epoch": 1.0672469465113015, "grad_norm": 0.396685391664505, "learning_rate": 8.13684622736007e-06, "loss": 0.4297, "step": 2534 }, { "epoch": 1.0676681173662783, "grad_norm": 0.4553171992301941, "learning_rate": 8.134937263380352e-06, "loss": 0.4785, "step": 2535 }, { "epoch": 1.0680892882212552, "grad_norm": 0.4537266194820404, "learning_rate": 8.133027546135183e-06, "loss": 0.4334, "step": 2536 }, { "epoch": 1.068510459076232, "grad_norm": 0.4562256634235382, "learning_rate": 8.131117076083434e-06, "loss": 0.4395, "step": 2537 }, { "epoch": 1.0689316299312088, "grad_norm": 0.42292022705078125, "learning_rate": 8.129205853684155e-06, "loss": 0.4101, "step": 2538 }, { "epoch": 1.0693528007861857, "grad_norm": 0.44172048568725586, "learning_rate": 8.127293879396574e-06, "loss": 0.4471, "step": 2539 }, { "epoch": 1.0697739716411625, "grad_norm": 0.4629998207092285, "learning_rate": 8.125381153680103e-06, "loss": 0.469, "step": 2540 }, { "epoch": 1.0701951424961393, "grad_norm": 0.4463474452495575, "learning_rate": 8.123467676994331e-06, "loss": 0.4152, "step": 2541 }, { "epoch": 1.0706163133511162, "grad_norm": 0.4710005521774292, "learning_rate": 8.121553449799035e-06, "loss": 0.4781, "step": 2542 }, { "epoch": 1.071037484206093, "grad_norm": 0.38669487833976746, "learning_rate": 8.119638472554164e-06, "loss": 0.4041, "step": 2543 }, { "epoch": 1.0714586550610699, "grad_norm": 0.44903233647346497, "learning_rate": 8.11772274571985e-06, "loss": 0.4323, "step": 2544 }, { "epoch": 1.0718798259160467, "grad_norm": 0.44316622614860535, "learning_rate": 8.115806269756405e-06, "loss": 0.4712, "step": 2545 }, { "epoch": 1.0723009967710235, "grad_norm": 0.4563806354999542, "learning_rate": 8.113889045124325e-06, "loss": 0.5212, "step": 2546 }, { "epoch": 1.0727221676260004, "grad_norm": 0.4083203375339508, "learning_rate": 8.11197107228428e-06, "loss": 0.4315, "step": 2547 }, { "epoch": 1.0731433384809772, "grad_norm": 0.47807595133781433, "learning_rate": 8.110052351697118e-06, "loss": 0.4857, "step": 2548 }, { "epoch": 1.073564509335954, "grad_norm": 0.4600072503089905, "learning_rate": 8.108132883823878e-06, "loss": 0.4619, "step": 2549 }, { "epoch": 1.0739856801909309, "grad_norm": 0.42435523867607117, "learning_rate": 8.106212669125769e-06, "loss": 0.3957, "step": 2550 }, { "epoch": 1.0744068510459077, "grad_norm": 0.3926413953304291, "learning_rate": 8.10429170806418e-06, "loss": 0.3681, "step": 2551 }, { "epoch": 1.0748280219008846, "grad_norm": 0.4689202606678009, "learning_rate": 8.102370001100684e-06, "loss": 0.4778, "step": 2552 }, { "epoch": 1.0752491927558614, "grad_norm": 0.4303743243217468, "learning_rate": 8.100447548697028e-06, "loss": 0.438, "step": 2553 }, { "epoch": 1.0756703636108382, "grad_norm": 0.4838293194770813, "learning_rate": 8.09852435131514e-06, "loss": 0.4927, "step": 2554 }, { "epoch": 1.076091534465815, "grad_norm": 0.5130427479743958, "learning_rate": 8.09660040941713e-06, "loss": 0.4405, "step": 2555 }, { "epoch": 1.0765127053207917, "grad_norm": 0.43087971210479736, "learning_rate": 8.094675723465284e-06, "loss": 0.4944, "step": 2556 }, { "epoch": 1.0769338761757687, "grad_norm": 0.4915970265865326, "learning_rate": 8.092750293922067e-06, "loss": 0.482, "step": 2557 }, { "epoch": 1.0773550470307454, "grad_norm": 0.43981871008872986, "learning_rate": 8.090824121250123e-06, "loss": 0.4021, "step": 2558 }, { "epoch": 1.0777762178857224, "grad_norm": 0.4638407528400421, "learning_rate": 8.088897205912271e-06, "loss": 0.4605, "step": 2559 }, { "epoch": 1.078197388740699, "grad_norm": 0.43002498149871826, "learning_rate": 8.086969548371518e-06, "loss": 0.4479, "step": 2560 }, { "epoch": 1.078618559595676, "grad_norm": 0.43868058919906616, "learning_rate": 8.085041149091038e-06, "loss": 0.4173, "step": 2561 }, { "epoch": 1.0790397304506527, "grad_norm": 0.45059338212013245, "learning_rate": 8.083112008534192e-06, "loss": 0.4896, "step": 2562 }, { "epoch": 1.0794609013056295, "grad_norm": 0.4933508336544037, "learning_rate": 8.081182127164513e-06, "loss": 0.4819, "step": 2563 }, { "epoch": 1.0798820721606064, "grad_norm": 0.4699150323867798, "learning_rate": 8.079251505445715e-06, "loss": 0.4697, "step": 2564 }, { "epoch": 1.0803032430155832, "grad_norm": 0.47207507491111755, "learning_rate": 8.077320143841691e-06, "loss": 0.4418, "step": 2565 }, { "epoch": 1.08072441387056, "grad_norm": 0.42964228987693787, "learning_rate": 8.07538804281651e-06, "loss": 0.3795, "step": 2566 }, { "epoch": 1.081145584725537, "grad_norm": 0.4890831708908081, "learning_rate": 8.073455202834418e-06, "loss": 0.462, "step": 2567 }, { "epoch": 1.0815667555805137, "grad_norm": 0.43240615725517273, "learning_rate": 8.07152162435984e-06, "loss": 0.4171, "step": 2568 }, { "epoch": 1.0819879264354906, "grad_norm": 0.4043336510658264, "learning_rate": 8.069587307857377e-06, "loss": 0.416, "step": 2569 }, { "epoch": 1.0824090972904674, "grad_norm": 0.4411877989768982, "learning_rate": 8.067652253791809e-06, "loss": 0.4498, "step": 2570 }, { "epoch": 1.0828302681454443, "grad_norm": 0.4813975691795349, "learning_rate": 8.065716462628093e-06, "loss": 0.481, "step": 2571 }, { "epoch": 1.083251439000421, "grad_norm": 0.37203213572502136, "learning_rate": 8.063779934831361e-06, "loss": 0.3899, "step": 2572 }, { "epoch": 1.083672609855398, "grad_norm": 0.449577271938324, "learning_rate": 8.061842670866924e-06, "loss": 0.4797, "step": 2573 }, { "epoch": 1.0840937807103748, "grad_norm": 0.49253344535827637, "learning_rate": 8.059904671200271e-06, "loss": 0.4738, "step": 2574 }, { "epoch": 1.0845149515653516, "grad_norm": 0.4350753724575043, "learning_rate": 8.057965936297066e-06, "loss": 0.4477, "step": 2575 }, { "epoch": 1.0849361224203284, "grad_norm": 0.5043025016784668, "learning_rate": 8.056026466623147e-06, "loss": 0.471, "step": 2576 }, { "epoch": 1.0853572932753053, "grad_norm": 0.4186974763870239, "learning_rate": 8.054086262644537e-06, "loss": 0.4403, "step": 2577 }, { "epoch": 1.0857784641302821, "grad_norm": 0.39984962344169617, "learning_rate": 8.052145324827423e-06, "loss": 0.3701, "step": 2578 }, { "epoch": 1.086199634985259, "grad_norm": 0.4742709994316101, "learning_rate": 8.050203653638183e-06, "loss": 0.4837, "step": 2579 }, { "epoch": 1.0866208058402358, "grad_norm": 0.40102627873420715, "learning_rate": 8.048261249543354e-06, "loss": 0.4086, "step": 2580 }, { "epoch": 1.0870419766952126, "grad_norm": 0.4036369025707245, "learning_rate": 8.046318113009668e-06, "loss": 0.4142, "step": 2581 }, { "epoch": 1.0874631475501895, "grad_norm": 0.46865957975387573, "learning_rate": 8.044374244504017e-06, "loss": 0.486, "step": 2582 }, { "epoch": 1.0878843184051663, "grad_norm": 0.3872566223144531, "learning_rate": 8.042429644493479e-06, "loss": 0.4018, "step": 2583 }, { "epoch": 1.0883054892601431, "grad_norm": 0.45499253273010254, "learning_rate": 8.040484313445301e-06, "loss": 0.4782, "step": 2584 }, { "epoch": 1.08872666011512, "grad_norm": 0.425449937582016, "learning_rate": 8.038538251826914e-06, "loss": 0.4374, "step": 2585 }, { "epoch": 1.0891478309700968, "grad_norm": 0.4193176329135895, "learning_rate": 8.03659146010591e-06, "loss": 0.4368, "step": 2586 }, { "epoch": 1.0895690018250737, "grad_norm": 0.4124079942703247, "learning_rate": 8.034643938750078e-06, "loss": 0.411, "step": 2587 }, { "epoch": 1.0899901726800505, "grad_norm": 0.45749861001968384, "learning_rate": 8.032695688227361e-06, "loss": 0.4487, "step": 2588 }, { "epoch": 1.0904113435350273, "grad_norm": 0.4553833603858948, "learning_rate": 8.03074670900589e-06, "loss": 0.4548, "step": 2589 }, { "epoch": 1.0908325143900042, "grad_norm": 0.37954872846603394, "learning_rate": 8.028797001553967e-06, "loss": 0.3825, "step": 2590 }, { "epoch": 1.091253685244981, "grad_norm": 0.45568928122520447, "learning_rate": 8.026846566340069e-06, "loss": 0.4678, "step": 2591 }, { "epoch": 1.0916748560999578, "grad_norm": 0.4863811135292053, "learning_rate": 8.024895403832849e-06, "loss": 0.5078, "step": 2592 }, { "epoch": 1.0920960269549347, "grad_norm": 0.45950859785079956, "learning_rate": 8.022943514501133e-06, "loss": 0.455, "step": 2593 }, { "epoch": 1.0925171978099115, "grad_norm": 0.4423510730266571, "learning_rate": 8.020990898813922e-06, "loss": 0.4155, "step": 2594 }, { "epoch": 1.0929383686648884, "grad_norm": 0.444324791431427, "learning_rate": 8.019037557240394e-06, "loss": 0.4552, "step": 2595 }, { "epoch": 1.0933595395198652, "grad_norm": 0.4625264108181, "learning_rate": 8.0170834902499e-06, "loss": 0.5021, "step": 2596 }, { "epoch": 1.093780710374842, "grad_norm": 0.4264865219593048, "learning_rate": 8.015128698311962e-06, "loss": 0.4276, "step": 2597 }, { "epoch": 1.0942018812298189, "grad_norm": 0.5194075703620911, "learning_rate": 8.013173181896283e-06, "loss": 0.4678, "step": 2598 }, { "epoch": 1.0946230520847957, "grad_norm": 0.4724694788455963, "learning_rate": 8.011216941472732e-06, "loss": 0.4552, "step": 2599 }, { "epoch": 1.0950442229397725, "grad_norm": 0.41723722219467163, "learning_rate": 8.00925997751136e-06, "loss": 0.4094, "step": 2600 }, { "epoch": 1.0954653937947494, "grad_norm": 0.4439750909805298, "learning_rate": 8.007302290482386e-06, "loss": 0.4413, "step": 2601 }, { "epoch": 1.0958865646497262, "grad_norm": 0.4320763349533081, "learning_rate": 8.005343880856205e-06, "loss": 0.4152, "step": 2602 }, { "epoch": 1.096307735504703, "grad_norm": 0.49609434604644775, "learning_rate": 8.003384749103384e-06, "loss": 0.479, "step": 2603 }, { "epoch": 1.09672890635968, "grad_norm": 0.424389123916626, "learning_rate": 8.001424895694666e-06, "loss": 0.4737, "step": 2604 }, { "epoch": 1.0971500772146567, "grad_norm": 0.39055120944976807, "learning_rate": 7.99946432110097e-06, "loss": 0.4208, "step": 2605 }, { "epoch": 1.0975712480696336, "grad_norm": 0.4272053837776184, "learning_rate": 7.997503025793377e-06, "loss": 0.4198, "step": 2606 }, { "epoch": 1.0979924189246104, "grad_norm": 0.4517911374568939, "learning_rate": 7.995541010243155e-06, "loss": 0.4883, "step": 2607 }, { "epoch": 1.0984135897795873, "grad_norm": 0.4052363336086273, "learning_rate": 7.993578274921737e-06, "loss": 0.4236, "step": 2608 }, { "epoch": 1.098834760634564, "grad_norm": 0.4476255476474762, "learning_rate": 7.99161482030073e-06, "loss": 0.4443, "step": 2609 }, { "epoch": 1.099255931489541, "grad_norm": 0.46426066756248474, "learning_rate": 7.989650646851913e-06, "loss": 0.461, "step": 2610 }, { "epoch": 1.0996771023445178, "grad_norm": 0.3897700011730194, "learning_rate": 7.987685755047243e-06, "loss": 0.377, "step": 2611 }, { "epoch": 1.1000982731994946, "grad_norm": 0.41689375042915344, "learning_rate": 7.985720145358845e-06, "loss": 0.4915, "step": 2612 }, { "epoch": 1.1005194440544714, "grad_norm": 0.4447849988937378, "learning_rate": 7.983753818259016e-06, "loss": 0.4201, "step": 2613 }, { "epoch": 1.1009406149094483, "grad_norm": 0.3864457309246063, "learning_rate": 7.981786774220227e-06, "loss": 0.3915, "step": 2614 }, { "epoch": 1.1013617857644251, "grad_norm": 0.46271830797195435, "learning_rate": 7.979819013715122e-06, "loss": 0.454, "step": 2615 }, { "epoch": 1.101782956619402, "grad_norm": 0.43360066413879395, "learning_rate": 7.977850537216513e-06, "loss": 0.4756, "step": 2616 }, { "epoch": 1.1022041274743788, "grad_norm": 0.45918986201286316, "learning_rate": 7.975881345197394e-06, "loss": 0.5112, "step": 2617 }, { "epoch": 1.1026252983293556, "grad_norm": 0.41361916065216064, "learning_rate": 7.973911438130918e-06, "loss": 0.4372, "step": 2618 }, { "epoch": 1.1030464691843325, "grad_norm": 0.4249841868877411, "learning_rate": 7.971940816490418e-06, "loss": 0.4215, "step": 2619 }, { "epoch": 1.1034676400393093, "grad_norm": 0.46990442276000977, "learning_rate": 7.969969480749396e-06, "loss": 0.4302, "step": 2620 }, { "epoch": 1.1038888108942861, "grad_norm": 0.4820380210876465, "learning_rate": 7.967997431381526e-06, "loss": 0.4184, "step": 2621 }, { "epoch": 1.104309981749263, "grad_norm": 0.5682182312011719, "learning_rate": 7.966024668860658e-06, "loss": 0.4957, "step": 2622 }, { "epoch": 1.1047311526042398, "grad_norm": 0.4023670256137848, "learning_rate": 7.964051193660805e-06, "loss": 0.3399, "step": 2623 }, { "epoch": 1.1051523234592167, "grad_norm": 0.5360981225967407, "learning_rate": 7.962077006256155e-06, "loss": 0.5414, "step": 2624 }, { "epoch": 1.1055734943141935, "grad_norm": 0.44144949316978455, "learning_rate": 7.960102107121068e-06, "loss": 0.4548, "step": 2625 }, { "epoch": 1.1059946651691703, "grad_norm": 0.4270375370979309, "learning_rate": 7.958126496730075e-06, "loss": 0.4157, "step": 2626 }, { "epoch": 1.1064158360241472, "grad_norm": 0.41447702050209045, "learning_rate": 7.95615017555788e-06, "loss": 0.4412, "step": 2627 }, { "epoch": 1.106837006879124, "grad_norm": 0.44656747579574585, "learning_rate": 7.95417314407935e-06, "loss": 0.4642, "step": 2628 }, { "epoch": 1.1072581777341008, "grad_norm": 0.4831722676753998, "learning_rate": 7.952195402769531e-06, "loss": 0.4039, "step": 2629 }, { "epoch": 1.1076793485890777, "grad_norm": 0.4924216568470001, "learning_rate": 7.950216952103636e-06, "loss": 0.4866, "step": 2630 }, { "epoch": 1.1081005194440545, "grad_norm": 0.45314934849739075, "learning_rate": 7.948237792557049e-06, "loss": 0.4589, "step": 2631 }, { "epoch": 1.1085216902990314, "grad_norm": 0.4788627624511719, "learning_rate": 7.946257924605325e-06, "loss": 0.4726, "step": 2632 }, { "epoch": 1.1089428611540082, "grad_norm": 0.42368218302726746, "learning_rate": 7.944277348724186e-06, "loss": 0.4388, "step": 2633 }, { "epoch": 1.109364032008985, "grad_norm": 0.4393455386161804, "learning_rate": 7.942296065389528e-06, "loss": 0.4713, "step": 2634 }, { "epoch": 1.1097852028639619, "grad_norm": 0.4850354790687561, "learning_rate": 7.940314075077415e-06, "loss": 0.4426, "step": 2635 }, { "epoch": 1.1102063737189387, "grad_norm": 0.4647517502307892, "learning_rate": 7.938331378264082e-06, "loss": 0.4348, "step": 2636 }, { "epoch": 1.1106275445739155, "grad_norm": 0.4397004246711731, "learning_rate": 7.936347975425931e-06, "loss": 0.4617, "step": 2637 }, { "epoch": 1.1110487154288924, "grad_norm": 0.4584105610847473, "learning_rate": 7.934363867039539e-06, "loss": 0.4061, "step": 2638 }, { "epoch": 1.1114698862838692, "grad_norm": 0.4388509690761566, "learning_rate": 7.932379053581649e-06, "loss": 0.4241, "step": 2639 }, { "epoch": 1.111891057138846, "grad_norm": 0.5418239831924438, "learning_rate": 7.930393535529171e-06, "loss": 0.5108, "step": 2640 }, { "epoch": 1.112312227993823, "grad_norm": 0.4506611227989197, "learning_rate": 7.928407313359188e-06, "loss": 0.4563, "step": 2641 }, { "epoch": 1.1127333988487997, "grad_norm": 0.4468066394329071, "learning_rate": 7.926420387548951e-06, "loss": 0.4556, "step": 2642 }, { "epoch": 1.1131545697037766, "grad_norm": 0.4581035375595093, "learning_rate": 7.924432758575881e-06, "loss": 0.407, "step": 2643 }, { "epoch": 1.1135757405587534, "grad_norm": 0.49403810501098633, "learning_rate": 7.922444426917571e-06, "loss": 0.4606, "step": 2644 }, { "epoch": 1.1139969114137303, "grad_norm": 0.44413894414901733, "learning_rate": 7.920455393051772e-06, "loss": 0.4586, "step": 2645 }, { "epoch": 1.114418082268707, "grad_norm": 0.504414975643158, "learning_rate": 7.918465657456414e-06, "loss": 0.4674, "step": 2646 }, { "epoch": 1.114839253123684, "grad_norm": 0.4554251730442047, "learning_rate": 7.916475220609592e-06, "loss": 0.4929, "step": 2647 }, { "epoch": 1.1152604239786608, "grad_norm": 0.4003306031227112, "learning_rate": 7.91448408298957e-06, "loss": 0.3928, "step": 2648 }, { "epoch": 1.1156815948336376, "grad_norm": 0.4729273021221161, "learning_rate": 7.912492245074783e-06, "loss": 0.4434, "step": 2649 }, { "epoch": 1.1161027656886144, "grad_norm": 0.511999249458313, "learning_rate": 7.910499707343828e-06, "loss": 0.4656, "step": 2650 }, { "epoch": 1.1165239365435913, "grad_norm": 0.41990822553634644, "learning_rate": 7.908506470275474e-06, "loss": 0.4274, "step": 2651 }, { "epoch": 1.1169451073985681, "grad_norm": 0.445055216550827, "learning_rate": 7.906512534348659e-06, "loss": 0.4653, "step": 2652 }, { "epoch": 1.117366278253545, "grad_norm": 0.5234692692756653, "learning_rate": 7.904517900042487e-06, "loss": 0.5142, "step": 2653 }, { "epoch": 1.1177874491085218, "grad_norm": 0.4776843190193176, "learning_rate": 7.90252256783623e-06, "loss": 0.4109, "step": 2654 }, { "epoch": 1.1182086199634986, "grad_norm": 0.4714716970920563, "learning_rate": 7.90052653820933e-06, "loss": 0.4753, "step": 2655 }, { "epoch": 1.1186297908184755, "grad_norm": 0.44201791286468506, "learning_rate": 7.898529811641393e-06, "loss": 0.4236, "step": 2656 }, { "epoch": 1.119050961673452, "grad_norm": 0.4627060890197754, "learning_rate": 7.896532388612195e-06, "loss": 0.4666, "step": 2657 }, { "epoch": 1.1194721325284291, "grad_norm": 0.44788241386413574, "learning_rate": 7.894534269601678e-06, "loss": 0.4247, "step": 2658 }, { "epoch": 1.1198933033834058, "grad_norm": 0.47841188311576843, "learning_rate": 7.892535455089952e-06, "loss": 0.5037, "step": 2659 }, { "epoch": 1.1203144742383828, "grad_norm": 0.4315614104270935, "learning_rate": 7.890535945557295e-06, "loss": 0.4337, "step": 2660 }, { "epoch": 1.1207356450933594, "grad_norm": 0.4117092490196228, "learning_rate": 7.888535741484148e-06, "loss": 0.4656, "step": 2661 }, { "epoch": 1.1211568159483363, "grad_norm": 0.445451557636261, "learning_rate": 7.886534843351123e-06, "loss": 0.4305, "step": 2662 }, { "epoch": 1.121577986803313, "grad_norm": 0.5140566229820251, "learning_rate": 7.884533251639e-06, "loss": 0.5005, "step": 2663 }, { "epoch": 1.12199915765829, "grad_norm": 0.4342360198497772, "learning_rate": 7.88253096682872e-06, "loss": 0.408, "step": 2664 }, { "epoch": 1.1224203285132668, "grad_norm": 0.5022042393684387, "learning_rate": 7.880527989401397e-06, "loss": 0.4607, "step": 2665 }, { "epoch": 1.1228414993682436, "grad_norm": 0.4336934983730316, "learning_rate": 7.878524319838306e-06, "loss": 0.4327, "step": 2666 }, { "epoch": 1.1232626702232205, "grad_norm": 0.4726102650165558, "learning_rate": 7.876519958620888e-06, "loss": 0.4415, "step": 2667 }, { "epoch": 1.1236838410781973, "grad_norm": 0.44797587394714355, "learning_rate": 7.874514906230757e-06, "loss": 0.4927, "step": 2668 }, { "epoch": 1.1241050119331741, "grad_norm": 0.4276764392852783, "learning_rate": 7.872509163149688e-06, "loss": 0.4071, "step": 2669 }, { "epoch": 1.124526182788151, "grad_norm": 0.48551681637763977, "learning_rate": 7.870502729859621e-06, "loss": 0.4988, "step": 2670 }, { "epoch": 1.1249473536431278, "grad_norm": 0.44207024574279785, "learning_rate": 7.868495606842663e-06, "loss": 0.3738, "step": 2671 }, { "epoch": 1.1253685244981046, "grad_norm": 0.4424763023853302, "learning_rate": 7.86648779458109e-06, "loss": 0.4258, "step": 2672 }, { "epoch": 1.1257896953530815, "grad_norm": 0.5026807188987732, "learning_rate": 7.864479293557338e-06, "loss": 0.4937, "step": 2673 }, { "epoch": 1.1262108662080583, "grad_norm": 0.42296427488327026, "learning_rate": 7.862470104254016e-06, "loss": 0.4124, "step": 2674 }, { "epoch": 1.1266320370630352, "grad_norm": 0.47503405809402466, "learning_rate": 7.860460227153888e-06, "loss": 0.4479, "step": 2675 }, { "epoch": 1.127053207918012, "grad_norm": 0.4041804373264313, "learning_rate": 7.858449662739891e-06, "loss": 0.408, "step": 2676 }, { "epoch": 1.1274743787729888, "grad_norm": 0.45692288875579834, "learning_rate": 7.856438411495127e-06, "loss": 0.4415, "step": 2677 }, { "epoch": 1.1278955496279657, "grad_norm": 0.4939061403274536, "learning_rate": 7.854426473902859e-06, "loss": 0.4505, "step": 2678 }, { "epoch": 1.1283167204829425, "grad_norm": 0.46349915862083435, "learning_rate": 7.852413850446519e-06, "loss": 0.4773, "step": 2679 }, { "epoch": 1.1287378913379194, "grad_norm": 0.4215664863586426, "learning_rate": 7.850400541609699e-06, "loss": 0.4386, "step": 2680 }, { "epoch": 1.1291590621928962, "grad_norm": 0.4267936646938324, "learning_rate": 7.848386547876163e-06, "loss": 0.4435, "step": 2681 }, { "epoch": 1.129580233047873, "grad_norm": 0.4503641128540039, "learning_rate": 7.846371869729833e-06, "loss": 0.4148, "step": 2682 }, { "epoch": 1.1300014039028499, "grad_norm": 0.48930180072784424, "learning_rate": 7.844356507654796e-06, "loss": 0.4673, "step": 2683 }, { "epoch": 1.1304225747578267, "grad_norm": 0.4528590738773346, "learning_rate": 7.842340462135305e-06, "loss": 0.5007, "step": 2684 }, { "epoch": 1.1308437456128035, "grad_norm": 0.45980578660964966, "learning_rate": 7.84032373365578e-06, "loss": 0.4469, "step": 2685 }, { "epoch": 1.1312649164677804, "grad_norm": 0.413771390914917, "learning_rate": 7.8383063227008e-06, "loss": 0.4535, "step": 2686 }, { "epoch": 1.1316860873227572, "grad_norm": 0.4400777220726013, "learning_rate": 7.83628822975511e-06, "loss": 0.4297, "step": 2687 }, { "epoch": 1.132107258177734, "grad_norm": 0.44876059889793396, "learning_rate": 7.834269455303622e-06, "loss": 0.4871, "step": 2688 }, { "epoch": 1.132528429032711, "grad_norm": 0.4952660799026489, "learning_rate": 7.832249999831408e-06, "loss": 0.4977, "step": 2689 }, { "epoch": 1.1329495998876877, "grad_norm": 0.47397899627685547, "learning_rate": 7.830229863823701e-06, "loss": 0.4682, "step": 2690 }, { "epoch": 1.1333707707426646, "grad_norm": 0.3894510269165039, "learning_rate": 7.828209047765904e-06, "loss": 0.3869, "step": 2691 }, { "epoch": 1.1337919415976414, "grad_norm": 0.39851924777030945, "learning_rate": 7.826187552143581e-06, "loss": 0.4168, "step": 2692 }, { "epoch": 1.1342131124526182, "grad_norm": 0.437978595495224, "learning_rate": 7.824165377442458e-06, "loss": 0.4537, "step": 2693 }, { "epoch": 1.134634283307595, "grad_norm": 0.41240620613098145, "learning_rate": 7.822142524148422e-06, "loss": 0.446, "step": 2694 }, { "epoch": 1.135055454162572, "grad_norm": 0.480893075466156, "learning_rate": 7.82011899274753e-06, "loss": 0.4149, "step": 2695 }, { "epoch": 1.1354766250175488, "grad_norm": 0.4805780351161957, "learning_rate": 7.818094783725999e-06, "loss": 0.4616, "step": 2696 }, { "epoch": 1.1358977958725256, "grad_norm": 0.42280834913253784, "learning_rate": 7.8160698975702e-06, "loss": 0.4322, "step": 2697 }, { "epoch": 1.1363189667275024, "grad_norm": 0.4450260400772095, "learning_rate": 7.814044334766684e-06, "loss": 0.4536, "step": 2698 }, { "epoch": 1.1367401375824793, "grad_norm": 0.4222185015678406, "learning_rate": 7.812018095802147e-06, "loss": 0.3302, "step": 2699 }, { "epoch": 1.137161308437456, "grad_norm": 0.5828087329864502, "learning_rate": 7.809991181163461e-06, "loss": 0.5272, "step": 2700 }, { "epoch": 1.137582479292433, "grad_norm": 0.4562770426273346, "learning_rate": 7.807963591337652e-06, "loss": 0.4612, "step": 2701 }, { "epoch": 1.1380036501474098, "grad_norm": 0.4877515733242035, "learning_rate": 7.805935326811913e-06, "loss": 0.4769, "step": 2702 }, { "epoch": 1.1384248210023866, "grad_norm": 0.44208452105522156, "learning_rate": 7.803906388073596e-06, "loss": 0.4568, "step": 2703 }, { "epoch": 1.1388459918573635, "grad_norm": 0.43357783555984497, "learning_rate": 7.801876775610213e-06, "loss": 0.4105, "step": 2704 }, { "epoch": 1.1392671627123403, "grad_norm": 0.44766804575920105, "learning_rate": 7.799846489909447e-06, "loss": 0.3948, "step": 2705 }, { "epoch": 1.1396883335673171, "grad_norm": 0.4262852370738983, "learning_rate": 7.797815531459135e-06, "loss": 0.4405, "step": 2706 }, { "epoch": 1.140109504422294, "grad_norm": 0.4483799636363983, "learning_rate": 7.795783900747273e-06, "loss": 0.4487, "step": 2707 }, { "epoch": 1.1405306752772708, "grad_norm": 0.45307648181915283, "learning_rate": 7.79375159826203e-06, "loss": 0.4467, "step": 2708 }, { "epoch": 1.1409518461322476, "grad_norm": 0.43186599016189575, "learning_rate": 7.791718624491725e-06, "loss": 0.4513, "step": 2709 }, { "epoch": 1.1413730169872245, "grad_norm": 0.4271083474159241, "learning_rate": 7.789684979924841e-06, "loss": 0.4029, "step": 2710 }, { "epoch": 1.1417941878422013, "grad_norm": 0.47024762630462646, "learning_rate": 7.787650665050028e-06, "loss": 0.4941, "step": 2711 }, { "epoch": 1.1422153586971782, "grad_norm": 0.44164612889289856, "learning_rate": 7.78561568035609e-06, "loss": 0.4178, "step": 2712 }, { "epoch": 1.142636529552155, "grad_norm": 0.4423764944076538, "learning_rate": 7.783580026332001e-06, "loss": 0.4347, "step": 2713 }, { "epoch": 1.1430577004071318, "grad_norm": 0.4819129705429077, "learning_rate": 7.781543703466881e-06, "loss": 0.5093, "step": 2714 }, { "epoch": 1.1434788712621087, "grad_norm": 0.4448574185371399, "learning_rate": 7.779506712250024e-06, "loss": 0.4021, "step": 2715 }, { "epoch": 1.1439000421170855, "grad_norm": 0.4354919493198395, "learning_rate": 7.77746905317088e-06, "loss": 0.457, "step": 2716 }, { "epoch": 1.1443212129720624, "grad_norm": 0.4266113042831421, "learning_rate": 7.775430726719058e-06, "loss": 0.4561, "step": 2717 }, { "epoch": 1.1447423838270392, "grad_norm": 0.45353999733924866, "learning_rate": 7.77339173338433e-06, "loss": 0.4386, "step": 2718 }, { "epoch": 1.145163554682016, "grad_norm": 0.4760096073150635, "learning_rate": 7.771352073656628e-06, "loss": 0.4067, "step": 2719 }, { "epoch": 1.1455847255369929, "grad_norm": 0.44160088896751404, "learning_rate": 7.769311748026041e-06, "loss": 0.4201, "step": 2720 }, { "epoch": 1.1460058963919697, "grad_norm": 0.44044220447540283, "learning_rate": 7.767270756982824e-06, "loss": 0.4446, "step": 2721 }, { "epoch": 1.1464270672469465, "grad_norm": 0.45883694291114807, "learning_rate": 7.76522910101738e-06, "loss": 0.4652, "step": 2722 }, { "epoch": 1.1468482381019234, "grad_norm": 0.4145326018333435, "learning_rate": 7.76318678062029e-06, "loss": 0.3889, "step": 2723 }, { "epoch": 1.1472694089569002, "grad_norm": 0.432822048664093, "learning_rate": 7.761143796282277e-06, "loss": 0.4397, "step": 2724 }, { "epoch": 1.147690579811877, "grad_norm": 0.4508892297744751, "learning_rate": 7.759100148494237e-06, "loss": 0.4527, "step": 2725 }, { "epoch": 1.148111750666854, "grad_norm": 0.4317128360271454, "learning_rate": 7.757055837747214e-06, "loss": 0.4312, "step": 2726 }, { "epoch": 1.1485329215218307, "grad_norm": 0.48238706588745117, "learning_rate": 7.755010864532418e-06, "loss": 0.4664, "step": 2727 }, { "epoch": 1.1489540923768076, "grad_norm": 0.4279309809207916, "learning_rate": 7.75296522934122e-06, "loss": 0.3935, "step": 2728 }, { "epoch": 1.1493752632317844, "grad_norm": 0.468429833650589, "learning_rate": 7.750918932665144e-06, "loss": 0.4711, "step": 2729 }, { "epoch": 1.1497964340867612, "grad_norm": 0.4933050572872162, "learning_rate": 7.748871974995877e-06, "loss": 0.5012, "step": 2730 }, { "epoch": 1.150217604941738, "grad_norm": 0.46582451462745667, "learning_rate": 7.746824356825264e-06, "loss": 0.4604, "step": 2731 }, { "epoch": 1.150638775796715, "grad_norm": 0.42327460646629333, "learning_rate": 7.744776078645308e-06, "loss": 0.4288, "step": 2732 }, { "epoch": 1.1510599466516918, "grad_norm": 0.5281771421432495, "learning_rate": 7.742727140948173e-06, "loss": 0.4804, "step": 2733 }, { "epoch": 1.1514811175066686, "grad_norm": 0.39676910638809204, "learning_rate": 7.740677544226176e-06, "loss": 0.3914, "step": 2734 }, { "epoch": 1.1519022883616454, "grad_norm": 0.45556753873825073, "learning_rate": 7.738627288971798e-06, "loss": 0.5262, "step": 2735 }, { "epoch": 1.1523234592166223, "grad_norm": 0.41903749108314514, "learning_rate": 7.736576375677676e-06, "loss": 0.4121, "step": 2736 }, { "epoch": 1.152744630071599, "grad_norm": 0.5782936215400696, "learning_rate": 7.734524804836604e-06, "loss": 0.5546, "step": 2737 }, { "epoch": 1.153165800926576, "grad_norm": 0.404318243265152, "learning_rate": 7.732472576941538e-06, "loss": 0.4154, "step": 2738 }, { "epoch": 1.1535869717815528, "grad_norm": 0.43866634368896484, "learning_rate": 7.730419692485587e-06, "loss": 0.4463, "step": 2739 }, { "epoch": 1.1540081426365296, "grad_norm": 0.4755026400089264, "learning_rate": 7.72836615196202e-06, "loss": 0.4873, "step": 2740 }, { "epoch": 1.1544293134915065, "grad_norm": 0.4092082679271698, "learning_rate": 7.726311955864261e-06, "loss": 0.3938, "step": 2741 }, { "epoch": 1.1548504843464833, "grad_norm": 0.5120775699615479, "learning_rate": 7.724257104685899e-06, "loss": 0.4805, "step": 2742 }, { "epoch": 1.1552716552014601, "grad_norm": 0.430968314409256, "learning_rate": 7.722201598920673e-06, "loss": 0.4426, "step": 2743 }, { "epoch": 1.155692826056437, "grad_norm": 0.428523987531662, "learning_rate": 7.720145439062483e-06, "loss": 0.4745, "step": 2744 }, { "epoch": 1.1561139969114138, "grad_norm": 0.524426281452179, "learning_rate": 7.718088625605382e-06, "loss": 0.5325, "step": 2745 }, { "epoch": 1.1565351677663906, "grad_norm": 0.41409775614738464, "learning_rate": 7.716031159043587e-06, "loss": 0.407, "step": 2746 }, { "epoch": 1.1569563386213675, "grad_norm": 0.512553870677948, "learning_rate": 7.713973039871463e-06, "loss": 0.4655, "step": 2747 }, { "epoch": 1.1573775094763443, "grad_norm": 0.4807262122631073, "learning_rate": 7.71191426858354e-06, "loss": 0.493, "step": 2748 }, { "epoch": 1.1577986803313212, "grad_norm": 0.4384244680404663, "learning_rate": 7.709854845674501e-06, "loss": 0.4297, "step": 2749 }, { "epoch": 1.158219851186298, "grad_norm": 0.46906745433807373, "learning_rate": 7.707794771639186e-06, "loss": 0.4507, "step": 2750 }, { "epoch": 1.1586410220412748, "grad_norm": 0.48578527569770813, "learning_rate": 7.705734046972588e-06, "loss": 0.4457, "step": 2751 }, { "epoch": 1.1590621928962517, "grad_norm": 0.485787570476532, "learning_rate": 7.703672672169864e-06, "loss": 0.4555, "step": 2752 }, { "epoch": 1.1594833637512285, "grad_norm": 0.4417814612388611, "learning_rate": 7.701610647726323e-06, "loss": 0.4656, "step": 2753 }, { "epoch": 1.1599045346062051, "grad_norm": 0.48108577728271484, "learning_rate": 7.699547974137427e-06, "loss": 0.4713, "step": 2754 }, { "epoch": 1.1603257054611822, "grad_norm": 0.5128677487373352, "learning_rate": 7.6974846518988e-06, "loss": 0.471, "step": 2755 }, { "epoch": 1.1607468763161588, "grad_norm": 0.4676459729671478, "learning_rate": 7.695420681506215e-06, "loss": 0.4546, "step": 2756 }, { "epoch": 1.1611680471711359, "grad_norm": 0.4297386407852173, "learning_rate": 7.693356063455608e-06, "loss": 0.4332, "step": 2757 }, { "epoch": 1.1615892180261125, "grad_norm": 0.4534546136856079, "learning_rate": 7.691290798243067e-06, "loss": 0.4651, "step": 2758 }, { "epoch": 1.1620103888810895, "grad_norm": 0.3905220329761505, "learning_rate": 7.689224886364834e-06, "loss": 0.4182, "step": 2759 }, { "epoch": 1.1624315597360662, "grad_norm": 0.4404107332229614, "learning_rate": 7.68715832831731e-06, "loss": 0.4257, "step": 2760 }, { "epoch": 1.1628527305910432, "grad_norm": 0.44589802622795105, "learning_rate": 7.685091124597048e-06, "loss": 0.4622, "step": 2761 }, { "epoch": 1.1632739014460198, "grad_norm": 0.46020203828811646, "learning_rate": 7.683023275700756e-06, "loss": 0.4471, "step": 2762 }, { "epoch": 1.163695072300997, "grad_norm": 0.4873010218143463, "learning_rate": 7.680954782125303e-06, "loss": 0.4832, "step": 2763 }, { "epoch": 1.1641162431559735, "grad_norm": 0.4688366949558258, "learning_rate": 7.678885644367703e-06, "loss": 0.4564, "step": 2764 }, { "epoch": 1.1645374140109506, "grad_norm": 0.48740583658218384, "learning_rate": 7.676815862925137e-06, "loss": 0.4739, "step": 2765 }, { "epoch": 1.1649585848659272, "grad_norm": 0.4678560793399811, "learning_rate": 7.674745438294926e-06, "loss": 0.4139, "step": 2766 }, { "epoch": 1.165379755720904, "grad_norm": 0.5404622554779053, "learning_rate": 7.672674370974558e-06, "loss": 0.496, "step": 2767 }, { "epoch": 1.1658009265758809, "grad_norm": 0.49611082673072815, "learning_rate": 7.670602661461672e-06, "loss": 0.4403, "step": 2768 }, { "epoch": 1.1662220974308577, "grad_norm": 0.4500599503517151, "learning_rate": 7.668530310254059e-06, "loss": 0.4508, "step": 2769 }, { "epoch": 1.1666432682858345, "grad_norm": 0.4579768180847168, "learning_rate": 7.666457317849663e-06, "loss": 0.4514, "step": 2770 }, { "epoch": 1.1670644391408114, "grad_norm": 0.4471321702003479, "learning_rate": 7.664383684746585e-06, "loss": 0.436, "step": 2771 }, { "epoch": 1.1674856099957882, "grad_norm": 0.4519810378551483, "learning_rate": 7.662309411443084e-06, "loss": 0.4707, "step": 2772 }, { "epoch": 1.167906780850765, "grad_norm": 0.45864978432655334, "learning_rate": 7.660234498437562e-06, "loss": 0.4551, "step": 2773 }, { "epoch": 1.1683279517057419, "grad_norm": 0.4243900179862976, "learning_rate": 7.658158946228585e-06, "loss": 0.4307, "step": 2774 }, { "epoch": 1.1687491225607187, "grad_norm": 0.4549727439880371, "learning_rate": 7.656082755314866e-06, "loss": 0.4761, "step": 2775 }, { "epoch": 1.1691702934156956, "grad_norm": 0.41239988803863525, "learning_rate": 7.654005926195276e-06, "loss": 0.4006, "step": 2776 }, { "epoch": 1.1695914642706724, "grad_norm": 0.4540138244628906, "learning_rate": 7.651928459368834e-06, "loss": 0.4252, "step": 2777 }, { "epoch": 1.1700126351256492, "grad_norm": 0.5172944664955139, "learning_rate": 7.64985035533472e-06, "loss": 0.49, "step": 2778 }, { "epoch": 1.170433805980626, "grad_norm": 0.4530699849128723, "learning_rate": 7.64777161459226e-06, "loss": 0.457, "step": 2779 }, { "epoch": 1.170854976835603, "grad_norm": 0.3904195725917816, "learning_rate": 7.645692237640938e-06, "loss": 0.3789, "step": 2780 }, { "epoch": 1.1712761476905797, "grad_norm": 0.4957123100757599, "learning_rate": 7.643612224980384e-06, "loss": 0.4704, "step": 2781 }, { "epoch": 1.1716973185455566, "grad_norm": 0.4744502007961273, "learning_rate": 7.641531577110389e-06, "loss": 0.4627, "step": 2782 }, { "epoch": 1.1721184894005334, "grad_norm": 0.47939372062683105, "learning_rate": 7.639450294530893e-06, "loss": 0.4711, "step": 2783 }, { "epoch": 1.1725396602555103, "grad_norm": 0.4368848502635956, "learning_rate": 7.637368377741985e-06, "loss": 0.3981, "step": 2784 }, { "epoch": 1.172960831110487, "grad_norm": 0.48645779490470886, "learning_rate": 7.635285827243912e-06, "loss": 0.4825, "step": 2785 }, { "epoch": 1.173382001965464, "grad_norm": 0.4237697720527649, "learning_rate": 7.633202643537072e-06, "loss": 0.3937, "step": 2786 }, { "epoch": 1.1738031728204408, "grad_norm": 0.4745596647262573, "learning_rate": 7.631118827122013e-06, "loss": 0.4823, "step": 2787 }, { "epoch": 1.1742243436754176, "grad_norm": 0.42053768038749695, "learning_rate": 7.629034378499437e-06, "loss": 0.4225, "step": 2788 }, { "epoch": 1.1746455145303945, "grad_norm": 0.4603900909423828, "learning_rate": 7.626949298170196e-06, "loss": 0.4523, "step": 2789 }, { "epoch": 1.1750666853853713, "grad_norm": 0.48531851172447205, "learning_rate": 7.624863586635294e-06, "loss": 0.4739, "step": 2790 }, { "epoch": 1.1754878562403481, "grad_norm": 0.4550316333770752, "learning_rate": 7.6227772443958915e-06, "loss": 0.4338, "step": 2791 }, { "epoch": 1.175909027095325, "grad_norm": 0.4418211579322815, "learning_rate": 7.620690271953293e-06, "loss": 0.439, "step": 2792 }, { "epoch": 1.1763301979503018, "grad_norm": 0.4837694764137268, "learning_rate": 7.6186026698089584e-06, "loss": 0.4907, "step": 2793 }, { "epoch": 1.1767513688052786, "grad_norm": 0.43993985652923584, "learning_rate": 7.616514438464499e-06, "loss": 0.4481, "step": 2794 }, { "epoch": 1.1771725396602555, "grad_norm": 0.49169406294822693, "learning_rate": 7.614425578421681e-06, "loss": 0.462, "step": 2795 }, { "epoch": 1.1775937105152323, "grad_norm": 0.48937752842903137, "learning_rate": 7.61233609018241e-06, "loss": 0.4729, "step": 2796 }, { "epoch": 1.1780148813702092, "grad_norm": 0.4221460521221161, "learning_rate": 7.6102459742487555e-06, "loss": 0.4327, "step": 2797 }, { "epoch": 1.178436052225186, "grad_norm": 0.4151066243648529, "learning_rate": 7.608155231122929e-06, "loss": 0.4399, "step": 2798 }, { "epoch": 1.1788572230801628, "grad_norm": 0.47934821248054504, "learning_rate": 7.606063861307298e-06, "loss": 0.4549, "step": 2799 }, { "epoch": 1.1792783939351397, "grad_norm": 0.4214365482330322, "learning_rate": 7.603971865304379e-06, "loss": 0.4177, "step": 2800 }, { "epoch": 1.1796995647901165, "grad_norm": 0.4392748177051544, "learning_rate": 7.601879243616838e-06, "loss": 0.4285, "step": 2801 }, { "epoch": 1.1801207356450933, "grad_norm": 0.45970696210861206, "learning_rate": 7.5997859967474906e-06, "loss": 0.469, "step": 2802 }, { "epoch": 1.1805419065000702, "grad_norm": 0.3873283267021179, "learning_rate": 7.597692125199307e-06, "loss": 0.3964, "step": 2803 }, { "epoch": 1.180963077355047, "grad_norm": 0.4451901316642761, "learning_rate": 7.595597629475402e-06, "loss": 0.4259, "step": 2804 }, { "epoch": 1.1813842482100239, "grad_norm": 0.45559442043304443, "learning_rate": 7.593502510079045e-06, "loss": 0.4785, "step": 2805 }, { "epoch": 1.1818054190650007, "grad_norm": 0.47520002722740173, "learning_rate": 7.59140676751365e-06, "loss": 0.4835, "step": 2806 }, { "epoch": 1.1822265899199775, "grad_norm": 0.46773210167884827, "learning_rate": 7.589310402282786e-06, "loss": 0.4724, "step": 2807 }, { "epoch": 1.1826477607749544, "grad_norm": 0.45271173119544983, "learning_rate": 7.587213414890169e-06, "loss": 0.4568, "step": 2808 }, { "epoch": 1.1830689316299312, "grad_norm": 0.4094722270965576, "learning_rate": 7.5851158058396655e-06, "loss": 0.3892, "step": 2809 }, { "epoch": 1.183490102484908, "grad_norm": 0.42542773485183716, "learning_rate": 7.58301757563529e-06, "loss": 0.4411, "step": 2810 }, { "epoch": 1.1839112733398849, "grad_norm": 0.4744713306427002, "learning_rate": 7.580918724781208e-06, "loss": 0.4769, "step": 2811 }, { "epoch": 1.1843324441948617, "grad_norm": 0.4582969844341278, "learning_rate": 7.578819253781731e-06, "loss": 0.443, "step": 2812 }, { "epoch": 1.1847536150498386, "grad_norm": 0.4498349130153656, "learning_rate": 7.576719163141325e-06, "loss": 0.4838, "step": 2813 }, { "epoch": 1.1851747859048154, "grad_norm": 0.4051441252231598, "learning_rate": 7.574618453364598e-06, "loss": 0.4012, "step": 2814 }, { "epoch": 1.1855959567597922, "grad_norm": 0.4957577884197235, "learning_rate": 7.572517124956314e-06, "loss": 0.4514, "step": 2815 }, { "epoch": 1.186017127614769, "grad_norm": 0.47222617268562317, "learning_rate": 7.57041517842138e-06, "loss": 0.498, "step": 2816 }, { "epoch": 1.186438298469746, "grad_norm": 0.42818206548690796, "learning_rate": 7.568312614264853e-06, "loss": 0.3748, "step": 2817 }, { "epoch": 1.1868594693247227, "grad_norm": 0.43639734387397766, "learning_rate": 7.56620943299194e-06, "loss": 0.4224, "step": 2818 }, { "epoch": 1.1872806401796996, "grad_norm": 0.467987596988678, "learning_rate": 7.564105635107997e-06, "loss": 0.4696, "step": 2819 }, { "epoch": 1.1877018110346764, "grad_norm": 0.47320863604545593, "learning_rate": 7.562001221118523e-06, "loss": 0.4231, "step": 2820 }, { "epoch": 1.1881229818896533, "grad_norm": 0.43984442949295044, "learning_rate": 7.559896191529169e-06, "loss": 0.4807, "step": 2821 }, { "epoch": 1.18854415274463, "grad_norm": 0.44024327397346497, "learning_rate": 7.5577905468457345e-06, "loss": 0.4418, "step": 2822 }, { "epoch": 1.188965323599607, "grad_norm": 0.45150530338287354, "learning_rate": 7.555684287574165e-06, "loss": 0.4759, "step": 2823 }, { "epoch": 1.1893864944545838, "grad_norm": 0.4327484965324402, "learning_rate": 7.553577414220557e-06, "loss": 0.4517, "step": 2824 }, { "epoch": 1.1898076653095606, "grad_norm": 0.4377991855144501, "learning_rate": 7.551469927291146e-06, "loss": 0.4258, "step": 2825 }, { "epoch": 1.1902288361645375, "grad_norm": 0.4099012017250061, "learning_rate": 7.549361827292325e-06, "loss": 0.4272, "step": 2826 }, { "epoch": 1.1906500070195143, "grad_norm": 0.4666547477245331, "learning_rate": 7.547253114730632e-06, "loss": 0.4763, "step": 2827 }, { "epoch": 1.1910711778744911, "grad_norm": 0.4661492109298706, "learning_rate": 7.545143790112746e-06, "loss": 0.5056, "step": 2828 }, { "epoch": 1.191492348729468, "grad_norm": 0.3748950958251953, "learning_rate": 7.543033853945498e-06, "loss": 0.3616, "step": 2829 }, { "epoch": 1.1919135195844448, "grad_norm": 0.6129472851753235, "learning_rate": 7.540923306735868e-06, "loss": 0.5782, "step": 2830 }, { "epoch": 1.1923346904394216, "grad_norm": 0.368692010641098, "learning_rate": 7.538812148990977e-06, "loss": 0.3795, "step": 2831 }, { "epoch": 1.1927558612943985, "grad_norm": 0.45208919048309326, "learning_rate": 7.536700381218098e-06, "loss": 0.4658, "step": 2832 }, { "epoch": 1.1931770321493753, "grad_norm": 0.4144175350666046, "learning_rate": 7.534588003924645e-06, "loss": 0.4547, "step": 2833 }, { "epoch": 1.1935982030043522, "grad_norm": 0.413536012172699, "learning_rate": 7.532475017618188e-06, "loss": 0.4167, "step": 2834 }, { "epoch": 1.194019373859329, "grad_norm": 0.48253050446510315, "learning_rate": 7.530361422806433e-06, "loss": 0.4941, "step": 2835 }, { "epoch": 1.1944405447143058, "grad_norm": 0.455451101064682, "learning_rate": 7.528247219997235e-06, "loss": 0.4768, "step": 2836 }, { "epoch": 1.1948617155692827, "grad_norm": 0.47777917981147766, "learning_rate": 7.526132409698601e-06, "loss": 0.4055, "step": 2837 }, { "epoch": 1.1952828864242595, "grad_norm": 0.5202696919441223, "learning_rate": 7.524016992418676e-06, "loss": 0.4739, "step": 2838 }, { "epoch": 1.1957040572792363, "grad_norm": 0.4032008945941925, "learning_rate": 7.521900968665754e-06, "loss": 0.4091, "step": 2839 }, { "epoch": 1.1961252281342132, "grad_norm": 0.4951149821281433, "learning_rate": 7.51978433894828e-06, "loss": 0.5, "step": 2840 }, { "epoch": 1.19654639898919, "grad_norm": 0.4177343249320984, "learning_rate": 7.5176671037748325e-06, "loss": 0.3976, "step": 2841 }, { "epoch": 1.1969675698441669, "grad_norm": 0.41100960969924927, "learning_rate": 7.515549263654147e-06, "loss": 0.4472, "step": 2842 }, { "epoch": 1.1973887406991437, "grad_norm": 0.45217835903167725, "learning_rate": 7.513430819095098e-06, "loss": 0.4452, "step": 2843 }, { "epoch": 1.1978099115541205, "grad_norm": 0.4638887941837311, "learning_rate": 7.511311770606711e-06, "loss": 0.4335, "step": 2844 }, { "epoch": 1.1982310824090974, "grad_norm": 0.4038792550563812, "learning_rate": 7.509192118698146e-06, "loss": 0.4257, "step": 2845 }, { "epoch": 1.1986522532640742, "grad_norm": 0.43293243646621704, "learning_rate": 7.507071863878721e-06, "loss": 0.4726, "step": 2846 }, { "epoch": 1.199073424119051, "grad_norm": 0.46311935782432556, "learning_rate": 7.504951006657893e-06, "loss": 0.4806, "step": 2847 }, { "epoch": 1.1994945949740279, "grad_norm": 0.4589502513408661, "learning_rate": 7.502829547545256e-06, "loss": 0.4434, "step": 2848 }, { "epoch": 1.1999157658290047, "grad_norm": 0.5037673711776733, "learning_rate": 7.500707487050565e-06, "loss": 0.4644, "step": 2849 }, { "epoch": 1.2003369366839816, "grad_norm": 0.3940584063529968, "learning_rate": 7.498584825683704e-06, "loss": 0.4035, "step": 2850 }, { "epoch": 1.2007581075389584, "grad_norm": 0.4544983208179474, "learning_rate": 7.496461563954711e-06, "loss": 0.4892, "step": 2851 }, { "epoch": 1.2011792783939352, "grad_norm": 0.43735644221305847, "learning_rate": 7.494337702373765e-06, "loss": 0.4488, "step": 2852 }, { "epoch": 1.2016004492489119, "grad_norm": 0.4178258180618286, "learning_rate": 7.492213241451188e-06, "loss": 0.3919, "step": 2853 }, { "epoch": 1.202021620103889, "grad_norm": 0.4142080545425415, "learning_rate": 7.49008818169745e-06, "loss": 0.4459, "step": 2854 }, { "epoch": 1.2024427909588655, "grad_norm": 0.506071925163269, "learning_rate": 7.487962523623159e-06, "loss": 0.502, "step": 2855 }, { "epoch": 1.2028639618138426, "grad_norm": 0.4077194929122925, "learning_rate": 7.485836267739072e-06, "loss": 0.4103, "step": 2856 }, { "epoch": 1.2032851326688192, "grad_norm": 0.41428086161613464, "learning_rate": 7.483709414556086e-06, "loss": 0.4218, "step": 2857 }, { "epoch": 1.2037063035237963, "grad_norm": 0.44808289408683777, "learning_rate": 7.481581964585245e-06, "loss": 0.4677, "step": 2858 }, { "epoch": 1.2041274743787729, "grad_norm": 0.47007420659065247, "learning_rate": 7.479453918337733e-06, "loss": 0.4874, "step": 2859 }, { "epoch": 1.20454864523375, "grad_norm": 0.4839390218257904, "learning_rate": 7.4773252763248805e-06, "loss": 0.4535, "step": 2860 }, { "epoch": 1.2049698160887266, "grad_norm": 0.4596876800060272, "learning_rate": 7.475196039058157e-06, "loss": 0.4535, "step": 2861 }, { "epoch": 1.2053909869437036, "grad_norm": 0.529753565788269, "learning_rate": 7.47306620704918e-06, "loss": 0.5086, "step": 2862 }, { "epoch": 1.2058121577986802, "grad_norm": 0.3936573266983032, "learning_rate": 7.470935780809705e-06, "loss": 0.3799, "step": 2863 }, { "epoch": 1.2062333286536573, "grad_norm": 0.4763331115245819, "learning_rate": 7.468804760851636e-06, "loss": 0.4721, "step": 2864 }, { "epoch": 1.206654499508634, "grad_norm": 0.4382491409778595, "learning_rate": 7.466673147687012e-06, "loss": 0.435, "step": 2865 }, { "epoch": 1.2070756703636107, "grad_norm": 0.44718873500823975, "learning_rate": 7.464540941828023e-06, "loss": 0.4197, "step": 2866 }, { "epoch": 1.2074968412185876, "grad_norm": 0.428705632686615, "learning_rate": 7.462408143786995e-06, "loss": 0.4326, "step": 2867 }, { "epoch": 1.2079180120735644, "grad_norm": 0.44409745931625366, "learning_rate": 7.460274754076397e-06, "loss": 0.439, "step": 2868 }, { "epoch": 1.2083391829285413, "grad_norm": 0.4626326262950897, "learning_rate": 7.458140773208846e-06, "loss": 0.453, "step": 2869 }, { "epoch": 1.208760353783518, "grad_norm": 0.41703200340270996, "learning_rate": 7.456006201697092e-06, "loss": 0.4018, "step": 2870 }, { "epoch": 1.209181524638495, "grad_norm": 0.43474671244621277, "learning_rate": 7.453871040054037e-06, "loss": 0.458, "step": 2871 }, { "epoch": 1.2096026954934718, "grad_norm": 0.49706485867500305, "learning_rate": 7.451735288792716e-06, "loss": 0.4921, "step": 2872 }, { "epoch": 1.2100238663484486, "grad_norm": 0.38442909717559814, "learning_rate": 7.449598948426309e-06, "loss": 0.4258, "step": 2873 }, { "epoch": 1.2104450372034254, "grad_norm": 0.4249861538410187, "learning_rate": 7.447462019468139e-06, "loss": 0.4473, "step": 2874 }, { "epoch": 1.2108662080584023, "grad_norm": 0.4420650005340576, "learning_rate": 7.445324502431668e-06, "loss": 0.4949, "step": 2875 }, { "epoch": 1.2112873789133791, "grad_norm": 0.44827163219451904, "learning_rate": 7.443186397830502e-06, "loss": 0.4625, "step": 2876 }, { "epoch": 1.211708549768356, "grad_norm": 0.4696606695652008, "learning_rate": 7.441047706178385e-06, "loss": 0.4123, "step": 2877 }, { "epoch": 1.2121297206233328, "grad_norm": 0.40886184573173523, "learning_rate": 7.438908427989204e-06, "loss": 0.4059, "step": 2878 }, { "epoch": 1.2125508914783096, "grad_norm": 0.4332182705402374, "learning_rate": 7.436768563776989e-06, "loss": 0.4354, "step": 2879 }, { "epoch": 1.2129720623332865, "grad_norm": 0.4698430895805359, "learning_rate": 7.434628114055906e-06, "loss": 0.4878, "step": 2880 }, { "epoch": 1.2133932331882633, "grad_norm": 0.4210549294948578, "learning_rate": 7.432487079340262e-06, "loss": 0.4061, "step": 2881 }, { "epoch": 1.2138144040432401, "grad_norm": 0.5087559819221497, "learning_rate": 7.430345460144512e-06, "loss": 0.5162, "step": 2882 }, { "epoch": 1.214235574898217, "grad_norm": 0.49168500304222107, "learning_rate": 7.428203256983244e-06, "loss": 0.5038, "step": 2883 }, { "epoch": 1.2146567457531938, "grad_norm": 0.43314129114151, "learning_rate": 7.426060470371186e-06, "loss": 0.4204, "step": 2884 }, { "epoch": 1.2150779166081707, "grad_norm": 0.42368054389953613, "learning_rate": 7.423917100823211e-06, "loss": 0.4125, "step": 2885 }, { "epoch": 1.2154990874631475, "grad_norm": 0.4621540307998657, "learning_rate": 7.4217731488543295e-06, "loss": 0.4832, "step": 2886 }, { "epoch": 1.2159202583181243, "grad_norm": 0.44303739070892334, "learning_rate": 7.419628614979693e-06, "loss": 0.4134, "step": 2887 }, { "epoch": 1.2163414291731012, "grad_norm": 0.45883437991142273, "learning_rate": 7.417483499714589e-06, "loss": 0.4399, "step": 2888 }, { "epoch": 1.216762600028078, "grad_norm": 0.48165664076805115, "learning_rate": 7.415337803574449e-06, "loss": 0.4304, "step": 2889 }, { "epoch": 1.2171837708830548, "grad_norm": 0.4291688799858093, "learning_rate": 7.413191527074845e-06, "loss": 0.4675, "step": 2890 }, { "epoch": 1.2176049417380317, "grad_norm": 0.42550230026245117, "learning_rate": 7.4110446707314844e-06, "loss": 0.4383, "step": 2891 }, { "epoch": 1.2180261125930085, "grad_norm": 0.4405587315559387, "learning_rate": 7.408897235060214e-06, "loss": 0.4037, "step": 2892 }, { "epoch": 1.2184472834479854, "grad_norm": 0.5173574686050415, "learning_rate": 7.406749220577024e-06, "loss": 0.5505, "step": 2893 }, { "epoch": 1.2188684543029622, "grad_norm": 0.32757100462913513, "learning_rate": 7.404600627798041e-06, "loss": 0.3233, "step": 2894 }, { "epoch": 1.219289625157939, "grad_norm": 0.43573611974716187, "learning_rate": 7.40245145723953e-06, "loss": 0.467, "step": 2895 }, { "epoch": 1.2197107960129159, "grad_norm": 0.4798855483531952, "learning_rate": 7.400301709417894e-06, "loss": 0.5232, "step": 2896 }, { "epoch": 1.2201319668678927, "grad_norm": 0.4388917088508606, "learning_rate": 7.39815138484968e-06, "loss": 0.459, "step": 2897 }, { "epoch": 1.2205531377228696, "grad_norm": 0.412143349647522, "learning_rate": 7.396000484051566e-06, "loss": 0.4143, "step": 2898 }, { "epoch": 1.2209743085778464, "grad_norm": 0.4741693139076233, "learning_rate": 7.393849007540377e-06, "loss": 0.4866, "step": 2899 }, { "epoch": 1.2213954794328232, "grad_norm": 0.44627249240875244, "learning_rate": 7.391696955833066e-06, "loss": 0.4669, "step": 2900 }, { "epoch": 1.2218166502878, "grad_norm": 0.4238986670970917, "learning_rate": 7.389544329446734e-06, "loss": 0.3854, "step": 2901 }, { "epoch": 1.222237821142777, "grad_norm": 0.43229982256889343, "learning_rate": 7.3873911288986145e-06, "loss": 0.4605, "step": 2902 }, { "epoch": 1.2226589919977537, "grad_norm": 0.41471895575523376, "learning_rate": 7.38523735470608e-06, "loss": 0.4173, "step": 2903 }, { "epoch": 1.2230801628527306, "grad_norm": 0.4350877106189728, "learning_rate": 7.383083007386641e-06, "loss": 0.4733, "step": 2904 }, { "epoch": 1.2235013337077074, "grad_norm": 0.38889604806900024, "learning_rate": 7.380928087457948e-06, "loss": 0.3744, "step": 2905 }, { "epoch": 1.2239225045626843, "grad_norm": 0.39912694692611694, "learning_rate": 7.378772595437785e-06, "loss": 0.4278, "step": 2906 }, { "epoch": 1.224343675417661, "grad_norm": 0.4382197856903076, "learning_rate": 7.3766165318440765e-06, "loss": 0.4805, "step": 2907 }, { "epoch": 1.224764846272638, "grad_norm": 0.4920336902141571, "learning_rate": 7.374459897194884e-06, "loss": 0.472, "step": 2908 }, { "epoch": 1.2251860171276148, "grad_norm": 0.4201703667640686, "learning_rate": 7.3723026920084014e-06, "loss": 0.3733, "step": 2909 }, { "epoch": 1.2256071879825916, "grad_norm": 0.4066748321056366, "learning_rate": 7.370144916802969e-06, "loss": 0.4734, "step": 2910 }, { "epoch": 1.2260283588375684, "grad_norm": 0.47568073868751526, "learning_rate": 7.367986572097058e-06, "loss": 0.4661, "step": 2911 }, { "epoch": 1.2264495296925453, "grad_norm": 0.492148220539093, "learning_rate": 7.3658276584092745e-06, "loss": 0.4694, "step": 2912 }, { "epoch": 1.2268707005475221, "grad_norm": 0.4483015835285187, "learning_rate": 7.363668176258366e-06, "loss": 0.4025, "step": 2913 }, { "epoch": 1.227291871402499, "grad_norm": 0.452428936958313, "learning_rate": 7.361508126163216e-06, "loss": 0.4077, "step": 2914 }, { "epoch": 1.2277130422574758, "grad_norm": 0.43584075570106506, "learning_rate": 7.35934750864284e-06, "loss": 0.4489, "step": 2915 }, { "epoch": 1.2281342131124526, "grad_norm": 0.4559588134288788, "learning_rate": 7.357186324216396e-06, "loss": 0.446, "step": 2916 }, { "epoch": 1.2285553839674295, "grad_norm": 0.43730050325393677, "learning_rate": 7.355024573403174e-06, "loss": 0.4243, "step": 2917 }, { "epoch": 1.2289765548224063, "grad_norm": 0.46833574771881104, "learning_rate": 7.352862256722602e-06, "loss": 0.4827, "step": 2918 }, { "epoch": 1.2293977256773831, "grad_norm": 0.4283673167228699, "learning_rate": 7.350699374694244e-06, "loss": 0.3916, "step": 2919 }, { "epoch": 1.22981889653236, "grad_norm": 0.4659501314163208, "learning_rate": 7.348535927837794e-06, "loss": 0.4525, "step": 2920 }, { "epoch": 1.2302400673873368, "grad_norm": 0.5235402584075928, "learning_rate": 7.346371916673094e-06, "loss": 0.4351, "step": 2921 }, { "epoch": 1.2306612382423137, "grad_norm": 0.45068925619125366, "learning_rate": 7.344207341720111e-06, "loss": 0.4245, "step": 2922 }, { "epoch": 1.2310824090972905, "grad_norm": 0.44107165932655334, "learning_rate": 7.342042203498952e-06, "loss": 0.4261, "step": 2923 }, { "epoch": 1.2315035799522673, "grad_norm": 0.5280149579048157, "learning_rate": 7.339876502529856e-06, "loss": 0.5029, "step": 2924 }, { "epoch": 1.2319247508072442, "grad_norm": 0.41278332471847534, "learning_rate": 7.337710239333202e-06, "loss": 0.4301, "step": 2925 }, { "epoch": 1.232345921662221, "grad_norm": 0.4314921796321869, "learning_rate": 7.335543414429501e-06, "loss": 0.4836, "step": 2926 }, { "epoch": 1.2327670925171978, "grad_norm": 0.46182483434677124, "learning_rate": 7.333376028339399e-06, "loss": 0.4895, "step": 2927 }, { "epoch": 1.2331882633721747, "grad_norm": 0.4625466465950012, "learning_rate": 7.331208081583677e-06, "loss": 0.4206, "step": 2928 }, { "epoch": 1.2336094342271515, "grad_norm": 0.43464648723602295, "learning_rate": 7.329039574683254e-06, "loss": 0.4144, "step": 2929 }, { "epoch": 1.2340306050821284, "grad_norm": 0.48186782002449036, "learning_rate": 7.326870508159177e-06, "loss": 0.4878, "step": 2930 }, { "epoch": 1.2344517759371052, "grad_norm": 0.4318752586841583, "learning_rate": 7.324700882532634e-06, "loss": 0.4511, "step": 2931 }, { "epoch": 1.234872946792082, "grad_norm": 0.38774123787879944, "learning_rate": 7.322530698324941e-06, "loss": 0.3782, "step": 2932 }, { "epoch": 1.2352941176470589, "grad_norm": 0.4732346534729004, "learning_rate": 7.320359956057557e-06, "loss": 0.4559, "step": 2933 }, { "epoch": 1.2357152885020357, "grad_norm": 0.46022436022758484, "learning_rate": 7.318188656252065e-06, "loss": 0.4168, "step": 2934 }, { "epoch": 1.2361364593570126, "grad_norm": 0.39525502920150757, "learning_rate": 7.31601679943019e-06, "loss": 0.4346, "step": 2935 }, { "epoch": 1.2365576302119894, "grad_norm": 0.41910722851753235, "learning_rate": 7.3138443861137845e-06, "loss": 0.4274, "step": 2936 }, { "epoch": 1.2369788010669662, "grad_norm": 0.5121814608573914, "learning_rate": 7.311671416824842e-06, "loss": 0.5089, "step": 2937 }, { "epoch": 1.237399971921943, "grad_norm": 0.3980477750301361, "learning_rate": 7.3094978920854805e-06, "loss": 0.3906, "step": 2938 }, { "epoch": 1.23782114277692, "grad_norm": 0.4665769338607788, "learning_rate": 7.3073238124179615e-06, "loss": 0.5083, "step": 2939 }, { "epoch": 1.2382423136318967, "grad_norm": 0.4564778208732605, "learning_rate": 7.3051491783446705e-06, "loss": 0.4591, "step": 2940 }, { "epoch": 1.2386634844868736, "grad_norm": 0.4255352318286896, "learning_rate": 7.3029739903881335e-06, "loss": 0.4382, "step": 2941 }, { "epoch": 1.2390846553418504, "grad_norm": 0.45478060841560364, "learning_rate": 7.300798249071005e-06, "loss": 0.4469, "step": 2942 }, { "epoch": 1.2395058261968273, "grad_norm": 0.43715235590934753, "learning_rate": 7.298621954916074e-06, "loss": 0.4531, "step": 2943 }, { "epoch": 1.239926997051804, "grad_norm": 0.43257635831832886, "learning_rate": 7.2964451084462626e-06, "loss": 0.4056, "step": 2944 }, { "epoch": 1.240348167906781, "grad_norm": 0.486383318901062, "learning_rate": 7.294267710184626e-06, "loss": 0.4738, "step": 2945 }, { "epoch": 1.2407693387617578, "grad_norm": 0.39441704750061035, "learning_rate": 7.292089760654352e-06, "loss": 0.3888, "step": 2946 }, { "epoch": 1.2411905096167346, "grad_norm": 0.4887961149215698, "learning_rate": 7.289911260378757e-06, "loss": 0.4991, "step": 2947 }, { "epoch": 1.2416116804717114, "grad_norm": 0.44971200823783875, "learning_rate": 7.287732209881296e-06, "loss": 0.4753, "step": 2948 }, { "epoch": 1.2420328513266883, "grad_norm": 0.4536128044128418, "learning_rate": 7.285552609685551e-06, "loss": 0.4199, "step": 2949 }, { "epoch": 1.2424540221816651, "grad_norm": 0.4351164400577545, "learning_rate": 7.283372460315241e-06, "loss": 0.4663, "step": 2950 }, { "epoch": 1.242875193036642, "grad_norm": 0.4578958749771118, "learning_rate": 7.28119176229421e-06, "loss": 0.4726, "step": 2951 }, { "epoch": 1.2432963638916188, "grad_norm": 0.39759373664855957, "learning_rate": 7.279010516146443e-06, "loss": 0.4188, "step": 2952 }, { "epoch": 1.2437175347465956, "grad_norm": 0.443820595741272, "learning_rate": 7.27682872239605e-06, "loss": 0.4787, "step": 2953 }, { "epoch": 1.2441387056015722, "grad_norm": 0.4557424485683441, "learning_rate": 7.274646381567272e-06, "loss": 0.3956, "step": 2954 }, { "epoch": 1.2445598764565493, "grad_norm": 0.5321094393730164, "learning_rate": 7.272463494184485e-06, "loss": 0.5393, "step": 2955 }, { "epoch": 1.244981047311526, "grad_norm": 0.39558061957359314, "learning_rate": 7.270280060772195e-06, "loss": 0.4165, "step": 2956 }, { "epoch": 1.245402218166503, "grad_norm": 0.42815980315208435, "learning_rate": 7.26809608185504e-06, "loss": 0.4101, "step": 2957 }, { "epoch": 1.2458233890214796, "grad_norm": 0.46730557084083557, "learning_rate": 7.26591155795779e-06, "loss": 0.457, "step": 2958 }, { "epoch": 1.2462445598764567, "grad_norm": 0.4501185417175293, "learning_rate": 7.263726489605341e-06, "loss": 0.4265, "step": 2959 }, { "epoch": 1.2466657307314333, "grad_norm": 0.4368460178375244, "learning_rate": 7.261540877322723e-06, "loss": 0.4212, "step": 2960 }, { "epoch": 1.2470869015864103, "grad_norm": 0.4718775451183319, "learning_rate": 7.259354721635099e-06, "loss": 0.4583, "step": 2961 }, { "epoch": 1.247508072441387, "grad_norm": 0.39660951495170593, "learning_rate": 7.2571680230677596e-06, "loss": 0.4373, "step": 2962 }, { "epoch": 1.247929243296364, "grad_norm": 0.4896053075790405, "learning_rate": 7.254980782146128e-06, "loss": 0.4713, "step": 2963 }, { "epoch": 1.2483504141513406, "grad_norm": 0.44611257314682007, "learning_rate": 7.252792999395753e-06, "loss": 0.4294, "step": 2964 }, { "epoch": 1.2487715850063177, "grad_norm": 0.4520438015460968, "learning_rate": 7.2506046753423205e-06, "loss": 0.4639, "step": 2965 }, { "epoch": 1.2491927558612943, "grad_norm": 0.4476231634616852, "learning_rate": 7.248415810511642e-06, "loss": 0.4395, "step": 2966 }, { "epoch": 1.2496139267162711, "grad_norm": 0.49229785799980164, "learning_rate": 7.246226405429657e-06, "loss": 0.4259, "step": 2967 }, { "epoch": 1.250035097571248, "grad_norm": 0.46730032563209534, "learning_rate": 7.2440364606224415e-06, "loss": 0.4266, "step": 2968 }, { "epoch": 1.250456268426225, "grad_norm": 0.4373278021812439, "learning_rate": 7.241845976616195e-06, "loss": 0.4023, "step": 2969 }, { "epoch": 1.2508774392812017, "grad_norm": 0.4690062999725342, "learning_rate": 7.2396549539372494e-06, "loss": 0.4655, "step": 2970 }, { "epoch": 1.2512986101361787, "grad_norm": 0.4785833954811096, "learning_rate": 7.237463393112066e-06, "loss": 0.4195, "step": 2971 }, { "epoch": 1.2517197809911553, "grad_norm": 0.5267866253852844, "learning_rate": 7.235271294667234e-06, "loss": 0.56, "step": 2972 }, { "epoch": 1.2521409518461322, "grad_norm": 0.41319096088409424, "learning_rate": 7.233078659129476e-06, "loss": 0.4208, "step": 2973 }, { "epoch": 1.252562122701109, "grad_norm": 0.4689396917819977, "learning_rate": 7.230885487025635e-06, "loss": 0.4347, "step": 2974 }, { "epoch": 1.2529832935560858, "grad_norm": 0.49075597524642944, "learning_rate": 7.2286917788826926e-06, "loss": 0.4726, "step": 2975 }, { "epoch": 1.2534044644110627, "grad_norm": 0.44971439242362976, "learning_rate": 7.226497535227752e-06, "loss": 0.4408, "step": 2976 }, { "epoch": 1.2538256352660395, "grad_norm": 0.45117679238319397, "learning_rate": 7.224302756588051e-06, "loss": 0.4574, "step": 2977 }, { "epoch": 1.2542468061210164, "grad_norm": 0.43216121196746826, "learning_rate": 7.22210744349095e-06, "loss": 0.4192, "step": 2978 }, { "epoch": 1.2546679769759932, "grad_norm": 0.45675164461135864, "learning_rate": 7.219911596463942e-06, "loss": 0.4078, "step": 2979 }, { "epoch": 1.25508914783097, "grad_norm": 0.46236559748649597, "learning_rate": 7.217715216034647e-06, "loss": 0.4939, "step": 2980 }, { "epoch": 1.2555103186859469, "grad_norm": 0.44759175181388855, "learning_rate": 7.215518302730814e-06, "loss": 0.4177, "step": 2981 }, { "epoch": 1.2559314895409237, "grad_norm": 0.49515220522880554, "learning_rate": 7.213320857080318e-06, "loss": 0.4349, "step": 2982 }, { "epoch": 1.2563526603959005, "grad_norm": 0.46790632605552673, "learning_rate": 7.211122879611163e-06, "loss": 0.4299, "step": 2983 }, { "epoch": 1.2567738312508774, "grad_norm": 0.4929637014865875, "learning_rate": 7.208924370851482e-06, "loss": 0.4538, "step": 2984 }, { "epoch": 1.2571950021058542, "grad_norm": 0.4190148115158081, "learning_rate": 7.2067253313295336e-06, "loss": 0.4082, "step": 2985 }, { "epoch": 1.257616172960831, "grad_norm": 0.475617915391922, "learning_rate": 7.204525761573707e-06, "loss": 0.4974, "step": 2986 }, { "epoch": 1.258037343815808, "grad_norm": 0.47499391436576843, "learning_rate": 7.202325662112514e-06, "loss": 0.4591, "step": 2987 }, { "epoch": 1.2584585146707847, "grad_norm": 0.42143750190734863, "learning_rate": 7.200125033474599e-06, "loss": 0.4331, "step": 2988 }, { "epoch": 1.2588796855257616, "grad_norm": 0.49190977215766907, "learning_rate": 7.197923876188728e-06, "loss": 0.4553, "step": 2989 }, { "epoch": 1.2593008563807384, "grad_norm": 0.44553622603416443, "learning_rate": 7.195722190783799e-06, "loss": 0.4311, "step": 2990 }, { "epoch": 1.2597220272357152, "grad_norm": 0.41778746247291565, "learning_rate": 7.193519977788834e-06, "loss": 0.4296, "step": 2991 }, { "epoch": 1.260143198090692, "grad_norm": 0.4989106357097626, "learning_rate": 7.191317237732984e-06, "loss": 0.4588, "step": 2992 }, { "epoch": 1.260564368945669, "grad_norm": 0.47606101632118225, "learning_rate": 7.189113971145525e-06, "loss": 0.4389, "step": 2993 }, { "epoch": 1.2609855398006458, "grad_norm": 0.39859095215797424, "learning_rate": 7.186910178555857e-06, "loss": 0.455, "step": 2994 }, { "epoch": 1.2614067106556226, "grad_norm": 0.47737279534339905, "learning_rate": 7.184705860493512e-06, "loss": 0.4512, "step": 2995 }, { "epoch": 1.2618278815105994, "grad_norm": 0.4792267084121704, "learning_rate": 7.182501017488146e-06, "loss": 0.471, "step": 2996 }, { "epoch": 1.2622490523655763, "grad_norm": 0.4466821551322937, "learning_rate": 7.180295650069539e-06, "loss": 0.4428, "step": 2997 }, { "epoch": 1.2626702232205531, "grad_norm": 0.517422080039978, "learning_rate": 7.178089758767599e-06, "loss": 0.5207, "step": 2998 }, { "epoch": 1.26309139407553, "grad_norm": 0.45935559272766113, "learning_rate": 7.175883344112359e-06, "loss": 0.4434, "step": 2999 }, { "epoch": 1.2635125649305068, "grad_norm": 0.509687602519989, "learning_rate": 7.173676406633979e-06, "loss": 0.4527, "step": 3000 }, { "epoch": 1.2639337357854836, "grad_norm": 0.4728834331035614, "learning_rate": 7.171468946862744e-06, "loss": 0.4675, "step": 3001 }, { "epoch": 1.2643549066404605, "grad_norm": 0.43726295232772827, "learning_rate": 7.169260965329064e-06, "loss": 0.4291, "step": 3002 }, { "epoch": 1.2647760774954373, "grad_norm": 0.44216814637184143, "learning_rate": 7.167052462563473e-06, "loss": 0.4409, "step": 3003 }, { "epoch": 1.2651972483504141, "grad_norm": 0.46257147192955017, "learning_rate": 7.1648434390966356e-06, "loss": 0.4556, "step": 3004 }, { "epoch": 1.265618419205391, "grad_norm": 0.4230228364467621, "learning_rate": 7.162633895459336e-06, "loss": 0.4433, "step": 3005 }, { "epoch": 1.2660395900603678, "grad_norm": 0.4872640371322632, "learning_rate": 7.160423832182484e-06, "loss": 0.4594, "step": 3006 }, { "epoch": 1.2664607609153447, "grad_norm": 0.420371949672699, "learning_rate": 7.158213249797117e-06, "loss": 0.4387, "step": 3007 }, { "epoch": 1.2668819317703215, "grad_norm": 0.3905847668647766, "learning_rate": 7.1560021488343956e-06, "loss": 0.4379, "step": 3008 }, { "epoch": 1.2673031026252983, "grad_norm": 0.4622861444950104, "learning_rate": 7.153790529825604e-06, "loss": 0.4615, "step": 3009 }, { "epoch": 1.2677242734802752, "grad_norm": 0.4208938777446747, "learning_rate": 7.151578393302155e-06, "loss": 0.4413, "step": 3010 }, { "epoch": 1.268145444335252, "grad_norm": 0.4457845091819763, "learning_rate": 7.149365739795577e-06, "loss": 0.4572, "step": 3011 }, { "epoch": 1.2685666151902288, "grad_norm": 0.4430846571922302, "learning_rate": 7.1471525698375345e-06, "loss": 0.4528, "step": 3012 }, { "epoch": 1.2689877860452057, "grad_norm": 0.5046516060829163, "learning_rate": 7.144938883959807e-06, "loss": 0.4829, "step": 3013 }, { "epoch": 1.2694089569001825, "grad_norm": 0.44617924094200134, "learning_rate": 7.1427246826943e-06, "loss": 0.4018, "step": 3014 }, { "epoch": 1.2698301277551594, "grad_norm": 0.42240235209465027, "learning_rate": 7.1405099665730445e-06, "loss": 0.4187, "step": 3015 }, { "epoch": 1.2702512986101362, "grad_norm": 0.44930407404899597, "learning_rate": 7.138294736128195e-06, "loss": 0.4554, "step": 3016 }, { "epoch": 1.270672469465113, "grad_norm": 0.4403809607028961, "learning_rate": 7.136078991892029e-06, "loss": 0.4488, "step": 3017 }, { "epoch": 1.2710936403200899, "grad_norm": 0.4687497317790985, "learning_rate": 7.133862734396948e-06, "loss": 0.4453, "step": 3018 }, { "epoch": 1.2715148111750667, "grad_norm": 0.4953014850616455, "learning_rate": 7.131645964175473e-06, "loss": 0.5009, "step": 3019 }, { "epoch": 1.2719359820300435, "grad_norm": 0.4262397587299347, "learning_rate": 7.129428681760255e-06, "loss": 0.4547, "step": 3020 }, { "epoch": 1.2723571528850204, "grad_norm": 0.4066486656665802, "learning_rate": 7.127210887684064e-06, "loss": 0.4041, "step": 3021 }, { "epoch": 1.2727783237399972, "grad_norm": 0.5093741416931152, "learning_rate": 7.124992582479792e-06, "loss": 0.4528, "step": 3022 }, { "epoch": 1.273199494594974, "grad_norm": 0.508658766746521, "learning_rate": 7.122773766680456e-06, "loss": 0.4948, "step": 3023 }, { "epoch": 1.273620665449951, "grad_norm": 0.42504122853279114, "learning_rate": 7.120554440819196e-06, "loss": 0.429, "step": 3024 }, { "epoch": 1.2740418363049277, "grad_norm": 0.39520561695098877, "learning_rate": 7.118334605429272e-06, "loss": 0.4315, "step": 3025 }, { "epoch": 1.2744630071599046, "grad_norm": 0.42432695627212524, "learning_rate": 7.116114261044069e-06, "loss": 0.4674, "step": 3026 }, { "epoch": 1.2748841780148814, "grad_norm": 0.4646085798740387, "learning_rate": 7.113893408197092e-06, "loss": 0.4371, "step": 3027 }, { "epoch": 1.2753053488698582, "grad_norm": 0.4631102383136749, "learning_rate": 7.111672047421972e-06, "loss": 0.4756, "step": 3028 }, { "epoch": 1.275726519724835, "grad_norm": 0.4450991153717041, "learning_rate": 7.109450179252457e-06, "loss": 0.4756, "step": 3029 }, { "epoch": 1.276147690579812, "grad_norm": 0.41978952288627625, "learning_rate": 7.107227804222421e-06, "loss": 0.4762, "step": 3030 }, { "epoch": 1.2765688614347888, "grad_norm": 0.45770588517189026, "learning_rate": 7.105004922865857e-06, "loss": 0.4262, "step": 3031 }, { "epoch": 1.2769900322897656, "grad_norm": 0.4178577661514282, "learning_rate": 7.102781535716881e-06, "loss": 0.4121, "step": 3032 }, { "epoch": 1.2774112031447424, "grad_norm": 0.47282519936561584, "learning_rate": 7.100557643309732e-06, "loss": 0.4784, "step": 3033 }, { "epoch": 1.2778323739997193, "grad_norm": 0.47132325172424316, "learning_rate": 7.098333246178767e-06, "loss": 0.4103, "step": 3034 }, { "epoch": 1.278253544854696, "grad_norm": 0.46157196164131165, "learning_rate": 7.096108344858466e-06, "loss": 0.4099, "step": 3035 }, { "epoch": 1.278674715709673, "grad_norm": 0.5542453527450562, "learning_rate": 7.093882939883431e-06, "loss": 0.4905, "step": 3036 }, { "epoch": 1.2790958865646498, "grad_norm": 0.42003434896469116, "learning_rate": 7.0916570317883834e-06, "loss": 0.4303, "step": 3037 }, { "epoch": 1.2795170574196266, "grad_norm": 0.39064735174179077, "learning_rate": 7.089430621108169e-06, "loss": 0.4193, "step": 3038 }, { "epoch": 1.2799382282746035, "grad_norm": 0.4432649612426758, "learning_rate": 7.087203708377748e-06, "loss": 0.4618, "step": 3039 }, { "epoch": 1.2803593991295803, "grad_norm": 0.4508145749568939, "learning_rate": 7.084976294132209e-06, "loss": 0.4504, "step": 3040 }, { "epoch": 1.2807805699845571, "grad_norm": 0.41423484683036804, "learning_rate": 7.082748378906752e-06, "loss": 0.4015, "step": 3041 }, { "epoch": 1.281201740839534, "grad_norm": 0.4359106123447418, "learning_rate": 7.080519963236706e-06, "loss": 0.4346, "step": 3042 }, { "epoch": 1.2816229116945108, "grad_norm": 0.4922808110713959, "learning_rate": 7.078291047657516e-06, "loss": 0.487, "step": 3043 }, { "epoch": 1.2820440825494877, "grad_norm": 0.4471505284309387, "learning_rate": 7.076061632704747e-06, "loss": 0.4648, "step": 3044 }, { "epoch": 1.2824652534044645, "grad_norm": 0.4216354787349701, "learning_rate": 7.073831718914087e-06, "loss": 0.4509, "step": 3045 }, { "epoch": 1.2828864242594413, "grad_norm": 0.4430099427700043, "learning_rate": 7.071601306821337e-06, "loss": 0.4197, "step": 3046 }, { "epoch": 1.2833075951144182, "grad_norm": 0.4190264642238617, "learning_rate": 7.069370396962427e-06, "loss": 0.4026, "step": 3047 }, { "epoch": 1.283728765969395, "grad_norm": 0.46140313148498535, "learning_rate": 7.0671389898734014e-06, "loss": 0.4565, "step": 3048 }, { "epoch": 1.2841499368243716, "grad_norm": 0.433490127325058, "learning_rate": 7.064907086090422e-06, "loss": 0.4604, "step": 3049 }, { "epoch": 1.2845711076793487, "grad_norm": 0.4221280515193939, "learning_rate": 7.062674686149774e-06, "loss": 0.413, "step": 3050 }, { "epoch": 1.2849922785343253, "grad_norm": 0.4656447172164917, "learning_rate": 7.060441790587861e-06, "loss": 0.4236, "step": 3051 }, { "epoch": 1.2854134493893024, "grad_norm": 0.49439993500709534, "learning_rate": 7.0582083999412045e-06, "loss": 0.4934, "step": 3052 }, { "epoch": 1.285834620244279, "grad_norm": 0.4735461175441742, "learning_rate": 7.055974514746446e-06, "loss": 0.4438, "step": 3053 }, { "epoch": 1.286255791099256, "grad_norm": 0.47564971446990967, "learning_rate": 7.053740135540344e-06, "loss": 0.4608, "step": 3054 }, { "epoch": 1.2866769619542326, "grad_norm": 0.43195921182632446, "learning_rate": 7.0515052628597765e-06, "loss": 0.4102, "step": 3055 }, { "epoch": 1.2870981328092097, "grad_norm": 0.4469795525074005, "learning_rate": 7.049269897241744e-06, "loss": 0.4496, "step": 3056 }, { "epoch": 1.2875193036641863, "grad_norm": 0.4716815650463104, "learning_rate": 7.047034039223362e-06, "loss": 0.469, "step": 3057 }, { "epoch": 1.2879404745191634, "grad_norm": 0.4941425919532776, "learning_rate": 7.044797689341859e-06, "loss": 0.4624, "step": 3058 }, { "epoch": 1.28836164537414, "grad_norm": 0.48631641268730164, "learning_rate": 7.042560848134592e-06, "loss": 0.4652, "step": 3059 }, { "epoch": 1.288782816229117, "grad_norm": 0.40235239267349243, "learning_rate": 7.0403235161390315e-06, "loss": 0.4173, "step": 3060 }, { "epoch": 1.2892039870840937, "grad_norm": 0.38696932792663574, "learning_rate": 7.0380856938927635e-06, "loss": 0.4129, "step": 3061 }, { "epoch": 1.2896251579390707, "grad_norm": 0.49747511744499207, "learning_rate": 7.035847381933494e-06, "loss": 0.4643, "step": 3062 }, { "epoch": 1.2900463287940473, "grad_norm": 0.38865211606025696, "learning_rate": 7.033608580799045e-06, "loss": 0.371, "step": 3063 }, { "epoch": 1.2904674996490244, "grad_norm": 0.4713485836982727, "learning_rate": 7.0313692910273615e-06, "loss": 0.4845, "step": 3064 }, { "epoch": 1.290888670504001, "grad_norm": 0.41814908385276794, "learning_rate": 7.0291295131565e-06, "loss": 0.3897, "step": 3065 }, { "epoch": 1.291309841358978, "grad_norm": 0.39720726013183594, "learning_rate": 7.026889247724635e-06, "loss": 0.3995, "step": 3066 }, { "epoch": 1.2917310122139547, "grad_norm": 0.4736206531524658, "learning_rate": 7.024648495270064e-06, "loss": 0.4749, "step": 3067 }, { "epoch": 1.2921521830689318, "grad_norm": 0.49436894059181213, "learning_rate": 7.02240725633119e-06, "loss": 0.4376, "step": 3068 }, { "epoch": 1.2925733539239084, "grad_norm": 0.5494786500930786, "learning_rate": 7.0201655314465445e-06, "loss": 0.5024, "step": 3069 }, { "epoch": 1.2929945247788854, "grad_norm": 0.45837274193763733, "learning_rate": 7.017923321154769e-06, "loss": 0.4421, "step": 3070 }, { "epoch": 1.293415695633862, "grad_norm": 0.44151613116264343, "learning_rate": 7.015680625994627e-06, "loss": 0.4446, "step": 3071 }, { "epoch": 1.293836866488839, "grad_norm": 0.4163573980331421, "learning_rate": 7.013437446504993e-06, "loss": 0.4596, "step": 3072 }, { "epoch": 1.2942580373438157, "grad_norm": 0.43143734335899353, "learning_rate": 7.0111937832248575e-06, "loss": 0.4817, "step": 3073 }, { "epoch": 1.2946792081987926, "grad_norm": 0.3934636414051056, "learning_rate": 7.008949636693334e-06, "loss": 0.4055, "step": 3074 }, { "epoch": 1.2951003790537694, "grad_norm": 0.4197433590888977, "learning_rate": 7.006705007449647e-06, "loss": 0.4209, "step": 3075 }, { "epoch": 1.2955215499087462, "grad_norm": 0.41351115703582764, "learning_rate": 7.004459896033137e-06, "loss": 0.4027, "step": 3076 }, { "epoch": 1.295942720763723, "grad_norm": 0.4514370858669281, "learning_rate": 7.002214302983262e-06, "loss": 0.5069, "step": 3077 }, { "epoch": 1.2963638916187, "grad_norm": 0.4371631145477295, "learning_rate": 6.9999682288395946e-06, "loss": 0.4497, "step": 3078 }, { "epoch": 1.2967850624736768, "grad_norm": 0.47010159492492676, "learning_rate": 6.997721674141823e-06, "loss": 0.4919, "step": 3079 }, { "epoch": 1.2972062333286536, "grad_norm": 0.4088192284107208, "learning_rate": 6.995474639429753e-06, "loss": 0.3599, "step": 3080 }, { "epoch": 1.2976274041836304, "grad_norm": 0.4828518033027649, "learning_rate": 6.993227125243304e-06, "loss": 0.4591, "step": 3081 }, { "epoch": 1.2980485750386073, "grad_norm": 0.45551925897598267, "learning_rate": 6.990979132122507e-06, "loss": 0.4608, "step": 3082 }, { "epoch": 1.298469745893584, "grad_norm": 0.45640143752098083, "learning_rate": 6.988730660607517e-06, "loss": 0.4498, "step": 3083 }, { "epoch": 1.298890916748561, "grad_norm": 0.45294371247291565, "learning_rate": 6.986481711238594e-06, "loss": 0.4226, "step": 3084 }, { "epoch": 1.2993120876035378, "grad_norm": 0.4430621564388275, "learning_rate": 6.984232284556122e-06, "loss": 0.4596, "step": 3085 }, { "epoch": 1.2997332584585146, "grad_norm": 0.4734133780002594, "learning_rate": 6.981982381100591e-06, "loss": 0.4725, "step": 3086 }, { "epoch": 1.3001544293134915, "grad_norm": 0.4136230945587158, "learning_rate": 6.979732001412612e-06, "loss": 0.4097, "step": 3087 }, { "epoch": 1.3005756001684683, "grad_norm": 0.44299718737602234, "learning_rate": 6.977481146032907e-06, "loss": 0.4919, "step": 3088 }, { "epoch": 1.3009967710234451, "grad_norm": 0.38920900225639343, "learning_rate": 6.975229815502316e-06, "loss": 0.3842, "step": 3089 }, { "epoch": 1.301417941878422, "grad_norm": 0.4965825378894806, "learning_rate": 6.972978010361787e-06, "loss": 0.4678, "step": 3090 }, { "epoch": 1.3018391127333988, "grad_norm": 0.4684782028198242, "learning_rate": 6.970725731152389e-06, "loss": 0.4717, "step": 3091 }, { "epoch": 1.3022602835883756, "grad_norm": 0.43264466524124146, "learning_rate": 6.9684729784153006e-06, "loss": 0.4235, "step": 3092 }, { "epoch": 1.3026814544433525, "grad_norm": 0.4460088014602661, "learning_rate": 6.966219752691814e-06, "loss": 0.4103, "step": 3093 }, { "epoch": 1.3031026252983293, "grad_norm": 0.4573230445384979, "learning_rate": 6.963966054523336e-06, "loss": 0.4314, "step": 3094 }, { "epoch": 1.3035237961533062, "grad_norm": 0.40673553943634033, "learning_rate": 6.96171188445139e-06, "loss": 0.4259, "step": 3095 }, { "epoch": 1.303944967008283, "grad_norm": 0.42922064661979675, "learning_rate": 6.959457243017607e-06, "loss": 0.4323, "step": 3096 }, { "epoch": 1.3043661378632598, "grad_norm": 0.4816132187843323, "learning_rate": 6.957202130763736e-06, "loss": 0.4756, "step": 3097 }, { "epoch": 1.3047873087182367, "grad_norm": 0.4755643904209137, "learning_rate": 6.954946548231637e-06, "loss": 0.4361, "step": 3098 }, { "epoch": 1.3052084795732135, "grad_norm": 0.42543894052505493, "learning_rate": 6.952690495963281e-06, "loss": 0.3802, "step": 3099 }, { "epoch": 1.3056296504281903, "grad_norm": 0.4613229036331177, "learning_rate": 6.9504339745007584e-06, "loss": 0.4887, "step": 3100 }, { "epoch": 1.3060508212831672, "grad_norm": 0.4549981355667114, "learning_rate": 6.948176984386266e-06, "loss": 0.4315, "step": 3101 }, { "epoch": 1.306471992138144, "grad_norm": 0.4293559193611145, "learning_rate": 6.945919526162115e-06, "loss": 0.4163, "step": 3102 }, { "epoch": 1.3068931629931209, "grad_norm": 0.4353148937225342, "learning_rate": 6.943661600370728e-06, "loss": 0.4857, "step": 3103 }, { "epoch": 1.3073143338480977, "grad_norm": 0.4799044132232666, "learning_rate": 6.941403207554645e-06, "loss": 0.4761, "step": 3104 }, { "epoch": 1.3077355047030745, "grad_norm": 0.3946090340614319, "learning_rate": 6.939144348256511e-06, "loss": 0.4011, "step": 3105 }, { "epoch": 1.3081566755580514, "grad_norm": 0.44898825883865356, "learning_rate": 6.936885023019089e-06, "loss": 0.4768, "step": 3106 }, { "epoch": 1.3085778464130282, "grad_norm": 0.4166807532310486, "learning_rate": 6.934625232385252e-06, "loss": 0.4059, "step": 3107 }, { "epoch": 1.308999017268005, "grad_norm": 0.4292454421520233, "learning_rate": 6.932364976897983e-06, "loss": 0.4514, "step": 3108 }, { "epoch": 1.3094201881229819, "grad_norm": 0.4028705060482025, "learning_rate": 6.930104257100378e-06, "loss": 0.3877, "step": 3109 }, { "epoch": 1.3098413589779587, "grad_norm": 0.4305552840232849, "learning_rate": 6.927843073535645e-06, "loss": 0.4277, "step": 3110 }, { "epoch": 1.3102625298329356, "grad_norm": 0.4597199261188507, "learning_rate": 6.925581426747105e-06, "loss": 0.4306, "step": 3111 }, { "epoch": 1.3106837006879124, "grad_norm": 0.4217122495174408, "learning_rate": 6.923319317278185e-06, "loss": 0.4226, "step": 3112 }, { "epoch": 1.3111048715428892, "grad_norm": 0.4233667254447937, "learning_rate": 6.92105674567243e-06, "loss": 0.4554, "step": 3113 }, { "epoch": 1.311526042397866, "grad_norm": 0.41771242022514343, "learning_rate": 6.91879371247349e-06, "loss": 0.3907, "step": 3114 }, { "epoch": 1.311947213252843, "grad_norm": 0.47920408844947815, "learning_rate": 6.916530218225131e-06, "loss": 0.4704, "step": 3115 }, { "epoch": 1.3123683841078198, "grad_norm": 0.420915812253952, "learning_rate": 6.914266263471226e-06, "loss": 0.412, "step": 3116 }, { "epoch": 1.3127895549627966, "grad_norm": 0.47145596146583557, "learning_rate": 6.912001848755759e-06, "loss": 0.5053, "step": 3117 }, { "epoch": 1.3132107258177734, "grad_norm": 0.41709890961647034, "learning_rate": 6.909736974622827e-06, "loss": 0.4326, "step": 3118 }, { "epoch": 1.3136318966727503, "grad_norm": 0.4159294664859772, "learning_rate": 6.907471641616638e-06, "loss": 0.4107, "step": 3119 }, { "epoch": 1.314053067527727, "grad_norm": 0.4977268576622009, "learning_rate": 6.905205850281502e-06, "loss": 0.4693, "step": 3120 }, { "epoch": 1.314474238382704, "grad_norm": 0.5102273225784302, "learning_rate": 6.902939601161852e-06, "loss": 0.4588, "step": 3121 }, { "epoch": 1.3148954092376808, "grad_norm": 0.44039586186408997, "learning_rate": 6.900672894802221e-06, "loss": 0.4451, "step": 3122 }, { "epoch": 1.3153165800926576, "grad_norm": 0.4578932523727417, "learning_rate": 6.898405731747254e-06, "loss": 0.4256, "step": 3123 }, { "epoch": 1.3157377509476345, "grad_norm": 0.5331529974937439, "learning_rate": 6.896138112541711e-06, "loss": 0.484, "step": 3124 }, { "epoch": 1.3161589218026113, "grad_norm": 0.41031524538993835, "learning_rate": 6.893870037730451e-06, "loss": 0.359, "step": 3125 }, { "epoch": 1.3165800926575881, "grad_norm": 0.468963086605072, "learning_rate": 6.891601507858457e-06, "loss": 0.5228, "step": 3126 }, { "epoch": 1.317001263512565, "grad_norm": 0.45556873083114624, "learning_rate": 6.889332523470808e-06, "loss": 0.4233, "step": 3127 }, { "epoch": 1.3174224343675418, "grad_norm": 0.48043856024742126, "learning_rate": 6.887063085112699e-06, "loss": 0.4912, "step": 3128 }, { "epoch": 1.3178436052225186, "grad_norm": 0.43815457820892334, "learning_rate": 6.884793193329431e-06, "loss": 0.4245, "step": 3129 }, { "epoch": 1.3182647760774955, "grad_norm": 0.45176684856414795, "learning_rate": 6.882522848666419e-06, "loss": 0.4843, "step": 3130 }, { "epoch": 1.3186859469324723, "grad_norm": 0.4345851242542267, "learning_rate": 6.88025205166918e-06, "loss": 0.3985, "step": 3131 }, { "epoch": 1.3191071177874492, "grad_norm": 0.5095771551132202, "learning_rate": 6.877980802883346e-06, "loss": 0.4506, "step": 3132 }, { "epoch": 1.319528288642426, "grad_norm": 0.42747238278388977, "learning_rate": 6.875709102854651e-06, "loss": 0.4532, "step": 3133 }, { "epoch": 1.3199494594974028, "grad_norm": 0.41621798276901245, "learning_rate": 6.873436952128946e-06, "loss": 0.4573, "step": 3134 }, { "epoch": 1.3203706303523797, "grad_norm": 0.5226338505744934, "learning_rate": 6.871164351252181e-06, "loss": 0.4398, "step": 3135 }, { "epoch": 1.3207918012073565, "grad_norm": 0.5030977725982666, "learning_rate": 6.8688913007704194e-06, "loss": 0.5055, "step": 3136 }, { "epoch": 1.3212129720623333, "grad_norm": 0.40802672505378723, "learning_rate": 6.866617801229833e-06, "loss": 0.4394, "step": 3137 }, { "epoch": 1.3216341429173102, "grad_norm": 0.48321792483329773, "learning_rate": 6.864343853176701e-06, "loss": 0.5009, "step": 3138 }, { "epoch": 1.322055313772287, "grad_norm": 0.4410761892795563, "learning_rate": 6.862069457157408e-06, "loss": 0.3736, "step": 3139 }, { "epoch": 1.3224764846272639, "grad_norm": 0.4649171531200409, "learning_rate": 6.859794613718447e-06, "loss": 0.4988, "step": 3140 }, { "epoch": 1.3228976554822407, "grad_norm": 0.4267072379589081, "learning_rate": 6.85751932340642e-06, "loss": 0.384, "step": 3141 }, { "epoch": 1.3233188263372175, "grad_norm": 0.5015639662742615, "learning_rate": 6.855243586768038e-06, "loss": 0.4642, "step": 3142 }, { "epoch": 1.3237399971921944, "grad_norm": 0.5648355484008789, "learning_rate": 6.852967404350114e-06, "loss": 0.4822, "step": 3143 }, { "epoch": 1.3241611680471712, "grad_norm": 0.46424391865730286, "learning_rate": 6.850690776699574e-06, "loss": 0.4356, "step": 3144 }, { "epoch": 1.324582338902148, "grad_norm": 0.4701475203037262, "learning_rate": 6.848413704363443e-06, "loss": 0.476, "step": 3145 }, { "epoch": 1.3250035097571249, "grad_norm": 0.3918053209781647, "learning_rate": 6.846136187888863e-06, "loss": 0.356, "step": 3146 }, { "epoch": 1.3254246806121017, "grad_norm": 0.5112391114234924, "learning_rate": 6.843858227823077e-06, "loss": 0.5112, "step": 3147 }, { "epoch": 1.3258458514670783, "grad_norm": 0.5029734373092651, "learning_rate": 6.841579824713433e-06, "loss": 0.4532, "step": 3148 }, { "epoch": 1.3262670223220554, "grad_norm": 0.47453010082244873, "learning_rate": 6.8393009791073895e-06, "loss": 0.4319, "step": 3149 }, { "epoch": 1.326688193177032, "grad_norm": 0.40806254744529724, "learning_rate": 6.837021691552508e-06, "loss": 0.3757, "step": 3150 }, { "epoch": 1.327109364032009, "grad_norm": 0.4641188383102417, "learning_rate": 6.834741962596458e-06, "loss": 0.463, "step": 3151 }, { "epoch": 1.3275305348869857, "grad_norm": 0.45826807618141174, "learning_rate": 6.832461792787017e-06, "loss": 0.4414, "step": 3152 }, { "epoch": 1.3279517057419628, "grad_norm": 0.4346387982368469, "learning_rate": 6.830181182672063e-06, "loss": 0.3998, "step": 3153 }, { "epoch": 1.3283728765969394, "grad_norm": 0.5131127238273621, "learning_rate": 6.827900132799587e-06, "loss": 0.5175, "step": 3154 }, { "epoch": 1.3287940474519164, "grad_norm": 0.39268678426742554, "learning_rate": 6.825618643717676e-06, "loss": 0.3973, "step": 3155 }, { "epoch": 1.329215218306893, "grad_norm": 0.4343218207359314, "learning_rate": 6.8233367159745324e-06, "loss": 0.413, "step": 3156 }, { "epoch": 1.32963638916187, "grad_norm": 0.42946314811706543, "learning_rate": 6.821054350118459e-06, "loss": 0.4032, "step": 3157 }, { "epoch": 1.3300575600168467, "grad_norm": 0.4568077325820923, "learning_rate": 6.818771546697863e-06, "loss": 0.4756, "step": 3158 }, { "epoch": 1.3304787308718238, "grad_norm": 0.4386969208717346, "learning_rate": 6.816488306261262e-06, "loss": 0.4337, "step": 3159 }, { "epoch": 1.3308999017268004, "grad_norm": 0.44139519333839417, "learning_rate": 6.8142046293572705e-06, "loss": 0.4661, "step": 3160 }, { "epoch": 1.3313210725817775, "grad_norm": 0.46564221382141113, "learning_rate": 6.811920516534616e-06, "loss": 0.4693, "step": 3161 }, { "epoch": 1.331742243436754, "grad_norm": 0.4503885805606842, "learning_rate": 6.809635968342124e-06, "loss": 0.4336, "step": 3162 }, { "epoch": 1.3321634142917311, "grad_norm": 0.47954338788986206, "learning_rate": 6.807350985328732e-06, "loss": 0.468, "step": 3163 }, { "epoch": 1.3325845851467077, "grad_norm": 0.4313298165798187, "learning_rate": 6.805065568043472e-06, "loss": 0.4772, "step": 3164 }, { "epoch": 1.3330057560016848, "grad_norm": 0.4427090287208557, "learning_rate": 6.80277971703549e-06, "loss": 0.4077, "step": 3165 }, { "epoch": 1.3334269268566614, "grad_norm": 0.5396823287010193, "learning_rate": 6.800493432854033e-06, "loss": 0.5052, "step": 3166 }, { "epoch": 1.3338480977116385, "grad_norm": 0.43530553579330444, "learning_rate": 6.798206716048449e-06, "loss": 0.406, "step": 3167 }, { "epoch": 1.334269268566615, "grad_norm": 0.43564826250076294, "learning_rate": 6.795919567168192e-06, "loss": 0.4142, "step": 3168 }, { "epoch": 1.3346904394215922, "grad_norm": 0.4582655429840088, "learning_rate": 6.793631986762822e-06, "loss": 0.4273, "step": 3169 }, { "epoch": 1.3351116102765688, "grad_norm": 0.45236513018608093, "learning_rate": 6.791343975382e-06, "loss": 0.4466, "step": 3170 }, { "epoch": 1.3355327811315458, "grad_norm": 0.4474009573459625, "learning_rate": 6.7890555335754914e-06, "loss": 0.4499, "step": 3171 }, { "epoch": 1.3359539519865224, "grad_norm": 0.46835657954216003, "learning_rate": 6.786766661893165e-06, "loss": 0.4148, "step": 3172 }, { "epoch": 1.3363751228414993, "grad_norm": 0.46588581800460815, "learning_rate": 6.784477360884991e-06, "loss": 0.4434, "step": 3173 }, { "epoch": 1.3367962936964761, "grad_norm": 0.43300044536590576, "learning_rate": 6.782187631101049e-06, "loss": 0.4483, "step": 3174 }, { "epoch": 1.337217464551453, "grad_norm": 0.43045616149902344, "learning_rate": 6.779897473091513e-06, "loss": 0.4083, "step": 3175 }, { "epoch": 1.3376386354064298, "grad_norm": 0.4424055516719818, "learning_rate": 6.777606887406667e-06, "loss": 0.4163, "step": 3176 }, { "epoch": 1.3380598062614066, "grad_norm": 0.5168639421463013, "learning_rate": 6.775315874596893e-06, "loss": 0.4679, "step": 3177 }, { "epoch": 1.3384809771163835, "grad_norm": 0.4315212368965149, "learning_rate": 6.773024435212678e-06, "loss": 0.4616, "step": 3178 }, { "epoch": 1.3389021479713603, "grad_norm": 0.430290549993515, "learning_rate": 6.770732569804614e-06, "loss": 0.4194, "step": 3179 }, { "epoch": 1.3393233188263372, "grad_norm": 0.46252885460853577, "learning_rate": 6.768440278923387e-06, "loss": 0.4755, "step": 3180 }, { "epoch": 1.339744489681314, "grad_norm": 0.43347474932670593, "learning_rate": 6.7661475631197935e-06, "loss": 0.3675, "step": 3181 }, { "epoch": 1.3401656605362908, "grad_norm": 0.47808176279067993, "learning_rate": 6.76385442294473e-06, "loss": 0.4646, "step": 3182 }, { "epoch": 1.3405868313912677, "grad_norm": 0.47566285729408264, "learning_rate": 6.7615608589491935e-06, "loss": 0.4624, "step": 3183 }, { "epoch": 1.3410080022462445, "grad_norm": 0.4405844211578369, "learning_rate": 6.759266871684283e-06, "loss": 0.4319, "step": 3184 }, { "epoch": 1.3414291731012213, "grad_norm": 0.46028536558151245, "learning_rate": 6.7569724617012e-06, "loss": 0.4745, "step": 3185 }, { "epoch": 1.3418503439561982, "grad_norm": 0.4549706280231476, "learning_rate": 6.754677629551248e-06, "loss": 0.468, "step": 3186 }, { "epoch": 1.342271514811175, "grad_norm": 0.42234060168266296, "learning_rate": 6.752382375785831e-06, "loss": 0.4419, "step": 3187 }, { "epoch": 1.3426926856661519, "grad_norm": 0.46175631880760193, "learning_rate": 6.750086700956454e-06, "loss": 0.4668, "step": 3188 }, { "epoch": 1.3431138565211287, "grad_norm": 0.44460323452949524, "learning_rate": 6.747790605614724e-06, "loss": 0.4545, "step": 3189 }, { "epoch": 1.3435350273761055, "grad_norm": 0.4551349878311157, "learning_rate": 6.745494090312348e-06, "loss": 0.4182, "step": 3190 }, { "epoch": 1.3439561982310824, "grad_norm": 0.3986932039260864, "learning_rate": 6.743197155601138e-06, "loss": 0.3761, "step": 3191 }, { "epoch": 1.3443773690860592, "grad_norm": 0.49341753125190735, "learning_rate": 6.740899802032999e-06, "loss": 0.4798, "step": 3192 }, { "epoch": 1.344798539941036, "grad_norm": 0.4248417913913727, "learning_rate": 6.738602030159942e-06, "loss": 0.4583, "step": 3193 }, { "epoch": 1.3452197107960129, "grad_norm": 0.3868306577205658, "learning_rate": 6.736303840534082e-06, "loss": 0.3912, "step": 3194 }, { "epoch": 1.3456408816509897, "grad_norm": 0.5609158873558044, "learning_rate": 6.734005233707624e-06, "loss": 0.5349, "step": 3195 }, { "epoch": 1.3460620525059666, "grad_norm": 0.40787023305892944, "learning_rate": 6.731706210232884e-06, "loss": 0.3999, "step": 3196 }, { "epoch": 1.3464832233609434, "grad_norm": 0.46547821164131165, "learning_rate": 6.72940677066227e-06, "loss": 0.4377, "step": 3197 }, { "epoch": 1.3469043942159202, "grad_norm": 0.4755505323410034, "learning_rate": 6.727106915548295e-06, "loss": 0.457, "step": 3198 }, { "epoch": 1.347325565070897, "grad_norm": 0.5219821929931641, "learning_rate": 6.7248066454435715e-06, "loss": 0.5094, "step": 3199 }, { "epoch": 1.347746735925874, "grad_norm": 0.4654194414615631, "learning_rate": 6.722505960900805e-06, "loss": 0.4307, "step": 3200 }, { "epoch": 1.3481679067808507, "grad_norm": 0.4989486634731293, "learning_rate": 6.7202048624728135e-06, "loss": 0.4436, "step": 3201 }, { "epoch": 1.3485890776358276, "grad_norm": 0.47092047333717346, "learning_rate": 6.717903350712501e-06, "loss": 0.4739, "step": 3202 }, { "epoch": 1.3490102484908044, "grad_norm": 0.40017572045326233, "learning_rate": 6.71560142617288e-06, "loss": 0.3821, "step": 3203 }, { "epoch": 1.3494314193457813, "grad_norm": 0.5039219856262207, "learning_rate": 6.713299089407057e-06, "loss": 0.4556, "step": 3204 }, { "epoch": 1.349852590200758, "grad_norm": 0.42897218465805054, "learning_rate": 6.710996340968241e-06, "loss": 0.4489, "step": 3205 }, { "epoch": 1.350273761055735, "grad_norm": 0.4807592034339905, "learning_rate": 6.708693181409739e-06, "loss": 0.3815, "step": 3206 }, { "epoch": 1.3506949319107118, "grad_norm": 0.4323490858078003, "learning_rate": 6.706389611284953e-06, "loss": 0.429, "step": 3207 }, { "epoch": 1.3511161027656886, "grad_norm": 0.43650516867637634, "learning_rate": 6.704085631147389e-06, "loss": 0.4268, "step": 3208 }, { "epoch": 1.3515372736206654, "grad_norm": 0.43431925773620605, "learning_rate": 6.701781241550649e-06, "loss": 0.4251, "step": 3209 }, { "epoch": 1.3519584444756423, "grad_norm": 0.4088340997695923, "learning_rate": 6.699476443048435e-06, "loss": 0.4482, "step": 3210 }, { "epoch": 1.3523796153306191, "grad_norm": 0.4214085340499878, "learning_rate": 6.697171236194544e-06, "loss": 0.4, "step": 3211 }, { "epoch": 1.352800786185596, "grad_norm": 0.4492333233356476, "learning_rate": 6.694865621542873e-06, "loss": 0.4689, "step": 3212 }, { "epoch": 1.3532219570405728, "grad_norm": 0.3907420039176941, "learning_rate": 6.69255959964742e-06, "loss": 0.4686, "step": 3213 }, { "epoch": 1.3536431278955496, "grad_norm": 0.39095500111579895, "learning_rate": 6.690253171062275e-06, "loss": 0.3716, "step": 3214 }, { "epoch": 1.3540642987505265, "grad_norm": 0.5293211340904236, "learning_rate": 6.68794633634163e-06, "loss": 0.5451, "step": 3215 }, { "epoch": 1.3544854696055033, "grad_norm": 0.38878220319747925, "learning_rate": 6.685639096039772e-06, "loss": 0.429, "step": 3216 }, { "epoch": 1.3549066404604801, "grad_norm": 0.4599126875400543, "learning_rate": 6.683331450711088e-06, "loss": 0.3727, "step": 3217 }, { "epoch": 1.355327811315457, "grad_norm": 0.4884486794471741, "learning_rate": 6.681023400910061e-06, "loss": 0.507, "step": 3218 }, { "epoch": 1.3557489821704338, "grad_norm": 0.45842915773391724, "learning_rate": 6.6787149471912704e-06, "loss": 0.4713, "step": 3219 }, { "epoch": 1.3561701530254107, "grad_norm": 0.39287304878234863, "learning_rate": 6.676406090109393e-06, "loss": 0.408, "step": 3220 }, { "epoch": 1.3565913238803875, "grad_norm": 0.4810461401939392, "learning_rate": 6.674096830219205e-06, "loss": 0.4797, "step": 3221 }, { "epoch": 1.3570124947353643, "grad_norm": 0.4151228666305542, "learning_rate": 6.671787168075577e-06, "loss": 0.4031, "step": 3222 }, { "epoch": 1.3574336655903412, "grad_norm": 0.45300453901290894, "learning_rate": 6.669477104233474e-06, "loss": 0.4402, "step": 3223 }, { "epoch": 1.357854836445318, "grad_norm": 0.4860920011997223, "learning_rate": 6.667166639247963e-06, "loss": 0.4412, "step": 3224 }, { "epoch": 1.3582760073002949, "grad_norm": 0.4495905935764313, "learning_rate": 6.664855773674203e-06, "loss": 0.4444, "step": 3225 }, { "epoch": 1.3586971781552717, "grad_norm": 0.43711596727371216, "learning_rate": 6.662544508067452e-06, "loss": 0.4293, "step": 3226 }, { "epoch": 1.3591183490102485, "grad_norm": 0.4950462877750397, "learning_rate": 6.660232842983063e-06, "loss": 0.4612, "step": 3227 }, { "epoch": 1.3595395198652254, "grad_norm": 0.45614093542099, "learning_rate": 6.657920778976483e-06, "loss": 0.4801, "step": 3228 }, { "epoch": 1.3599606907202022, "grad_norm": 0.4275091886520386, "learning_rate": 6.655608316603257e-06, "loss": 0.4259, "step": 3229 }, { "epoch": 1.360381861575179, "grad_norm": 0.4167743921279907, "learning_rate": 6.653295456419028e-06, "loss": 0.4219, "step": 3230 }, { "epoch": 1.3608030324301559, "grad_norm": 0.44808429479599, "learning_rate": 6.650982198979531e-06, "loss": 0.4764, "step": 3231 }, { "epoch": 1.3612242032851327, "grad_norm": 0.4172740876674652, "learning_rate": 6.6486685448405944e-06, "loss": 0.4658, "step": 3232 }, { "epoch": 1.3616453741401096, "grad_norm": 0.47722551226615906, "learning_rate": 6.64635449455815e-06, "loss": 0.4449, "step": 3233 }, { "epoch": 1.3620665449950864, "grad_norm": 0.4910096824169159, "learning_rate": 6.644040048688215e-06, "loss": 0.4904, "step": 3234 }, { "epoch": 1.3624877158500632, "grad_norm": 0.48068365454673767, "learning_rate": 6.64172520778691e-06, "loss": 0.4891, "step": 3235 }, { "epoch": 1.36290888670504, "grad_norm": 0.38656145334243774, "learning_rate": 6.639409972410446e-06, "loss": 0.4291, "step": 3236 }, { "epoch": 1.363330057560017, "grad_norm": 0.4861101806163788, "learning_rate": 6.6370943431151294e-06, "loss": 0.467, "step": 3237 }, { "epoch": 1.3637512284149937, "grad_norm": 0.46581777930259705, "learning_rate": 6.634778320457363e-06, "loss": 0.4202, "step": 3238 }, { "epoch": 1.3641723992699706, "grad_norm": 0.4598459005355835, "learning_rate": 6.632461904993639e-06, "loss": 0.4359, "step": 3239 }, { "epoch": 1.3645935701249474, "grad_norm": 0.38766005635261536, "learning_rate": 6.6301450972805516e-06, "loss": 0.3756, "step": 3240 }, { "epoch": 1.3650147409799243, "grad_norm": 0.4392856955528259, "learning_rate": 6.627827897874784e-06, "loss": 0.4759, "step": 3241 }, { "epoch": 1.365435911834901, "grad_norm": 0.42546331882476807, "learning_rate": 6.625510307333115e-06, "loss": 0.4168, "step": 3242 }, { "epoch": 1.365857082689878, "grad_norm": 0.461894154548645, "learning_rate": 6.623192326212416e-06, "loss": 0.4633, "step": 3243 }, { "epoch": 1.3662782535448548, "grad_norm": 0.4334721565246582, "learning_rate": 6.620873955069657e-06, "loss": 0.4379, "step": 3244 }, { "epoch": 1.3666994243998316, "grad_norm": 0.4114982783794403, "learning_rate": 6.618555194461893e-06, "loss": 0.3895, "step": 3245 }, { "epoch": 1.3671205952548084, "grad_norm": 0.47564372420310974, "learning_rate": 6.616236044946283e-06, "loss": 0.5211, "step": 3246 }, { "epoch": 1.3675417661097853, "grad_norm": 0.4045105278491974, "learning_rate": 6.613916507080072e-06, "loss": 0.4212, "step": 3247 }, { "epoch": 1.3679629369647621, "grad_norm": 0.4674696922302246, "learning_rate": 6.6115965814206e-06, "loss": 0.4198, "step": 3248 }, { "epoch": 1.3683841078197387, "grad_norm": 0.43923357129096985, "learning_rate": 6.609276268525301e-06, "loss": 0.4128, "step": 3249 }, { "epoch": 1.3688052786747158, "grad_norm": 0.5146051645278931, "learning_rate": 6.606955568951702e-06, "loss": 0.5438, "step": 3250 }, { "epoch": 1.3692264495296924, "grad_norm": 0.40973326563835144, "learning_rate": 6.604634483257423e-06, "loss": 0.4291, "step": 3251 }, { "epoch": 1.3696476203846695, "grad_norm": 0.48463696241378784, "learning_rate": 6.602313012000177e-06, "loss": 0.4702, "step": 3252 }, { "epoch": 1.370068791239646, "grad_norm": 0.49970361590385437, "learning_rate": 6.599991155737768e-06, "loss": 0.4099, "step": 3253 }, { "epoch": 1.3704899620946231, "grad_norm": 0.39536818861961365, "learning_rate": 6.597668915028094e-06, "loss": 0.4024, "step": 3254 }, { "epoch": 1.3709111329495998, "grad_norm": 0.44680094718933105, "learning_rate": 6.595346290429145e-06, "loss": 0.4404, "step": 3255 }, { "epoch": 1.3713323038045768, "grad_norm": 0.5005700588226318, "learning_rate": 6.593023282499003e-06, "loss": 0.4812, "step": 3256 }, { "epoch": 1.3717534746595534, "grad_norm": 0.4348304867744446, "learning_rate": 6.590699891795843e-06, "loss": 0.4643, "step": 3257 }, { "epoch": 1.3721746455145305, "grad_norm": 0.39776137471199036, "learning_rate": 6.588376118877933e-06, "loss": 0.3936, "step": 3258 }, { "epoch": 1.3725958163695071, "grad_norm": 0.5024625658988953, "learning_rate": 6.586051964303626e-06, "loss": 0.459, "step": 3259 }, { "epoch": 1.3730169872244842, "grad_norm": 0.4918850362300873, "learning_rate": 6.583727428631376e-06, "loss": 0.4308, "step": 3260 }, { "epoch": 1.3734381580794608, "grad_norm": 0.4414176642894745, "learning_rate": 6.581402512419724e-06, "loss": 0.4382, "step": 3261 }, { "epoch": 1.3738593289344379, "grad_norm": 0.4367092251777649, "learning_rate": 6.579077216227303e-06, "loss": 0.4066, "step": 3262 }, { "epoch": 1.3742804997894145, "grad_norm": 0.46894216537475586, "learning_rate": 6.576751540612835e-06, "loss": 0.5032, "step": 3263 }, { "epoch": 1.3747016706443915, "grad_norm": 0.41913914680480957, "learning_rate": 6.574425486135139e-06, "loss": 0.4175, "step": 3264 }, { "epoch": 1.3751228414993681, "grad_norm": 0.37517690658569336, "learning_rate": 6.57209905335312e-06, "loss": 0.4329, "step": 3265 }, { "epoch": 1.3755440123543452, "grad_norm": 0.40445786714553833, "learning_rate": 6.5697722428257725e-06, "loss": 0.4036, "step": 3266 }, { "epoch": 1.3759651832093218, "grad_norm": 0.4842759966850281, "learning_rate": 6.5674450551121875e-06, "loss": 0.465, "step": 3267 }, { "epoch": 1.3763863540642989, "grad_norm": 0.46725812554359436, "learning_rate": 6.5651174907715434e-06, "loss": 0.442, "step": 3268 }, { "epoch": 1.3768075249192755, "grad_norm": 0.48296743631362915, "learning_rate": 6.5627895503631074e-06, "loss": 0.4797, "step": 3269 }, { "epoch": 1.3772286957742526, "grad_norm": 0.441556453704834, "learning_rate": 6.560461234446242e-06, "loss": 0.4458, "step": 3270 }, { "epoch": 1.3776498666292292, "grad_norm": 0.4475367069244385, "learning_rate": 6.5581325435803945e-06, "loss": 0.424, "step": 3271 }, { "epoch": 1.3780710374842062, "grad_norm": 0.43113166093826294, "learning_rate": 6.555803478325107e-06, "loss": 0.404, "step": 3272 }, { "epoch": 1.3784922083391828, "grad_norm": 0.4797497093677521, "learning_rate": 6.553474039240007e-06, "loss": 0.484, "step": 3273 }, { "epoch": 1.3789133791941597, "grad_norm": 0.4547789990901947, "learning_rate": 6.551144226884816e-06, "loss": 0.4759, "step": 3274 }, { "epoch": 1.3793345500491365, "grad_norm": 0.45207545161247253, "learning_rate": 6.548814041819342e-06, "loss": 0.4108, "step": 3275 }, { "epoch": 1.3797557209041134, "grad_norm": 0.44495463371276855, "learning_rate": 6.546483484603485e-06, "loss": 0.4847, "step": 3276 }, { "epoch": 1.3801768917590902, "grad_norm": 0.3595247268676758, "learning_rate": 6.5441525557972315e-06, "loss": 0.3424, "step": 3277 }, { "epoch": 1.380598062614067, "grad_norm": 0.4501335024833679, "learning_rate": 6.541821255960663e-06, "loss": 0.4987, "step": 3278 }, { "epoch": 1.3810192334690439, "grad_norm": 0.4349310100078583, "learning_rate": 6.539489585653941e-06, "loss": 0.4062, "step": 3279 }, { "epoch": 1.3814404043240207, "grad_norm": 0.4953826665878296, "learning_rate": 6.537157545437326e-06, "loss": 0.5172, "step": 3280 }, { "epoch": 1.3818615751789975, "grad_norm": 0.40918436646461487, "learning_rate": 6.534825135871159e-06, "loss": 0.4084, "step": 3281 }, { "epoch": 1.3822827460339744, "grad_norm": 0.4120166599750519, "learning_rate": 6.532492357515875e-06, "loss": 0.3971, "step": 3282 }, { "epoch": 1.3827039168889512, "grad_norm": 0.43612655997276306, "learning_rate": 6.530159210931997e-06, "loss": 0.4605, "step": 3283 }, { "epoch": 1.383125087743928, "grad_norm": 0.44829684495925903, "learning_rate": 6.527825696680133e-06, "loss": 0.4294, "step": 3284 }, { "epoch": 1.383546258598905, "grad_norm": 0.46828481554985046, "learning_rate": 6.525491815320986e-06, "loss": 0.4454, "step": 3285 }, { "epoch": 1.3839674294538817, "grad_norm": 0.44655635952949524, "learning_rate": 6.523157567415337e-06, "loss": 0.4291, "step": 3286 }, { "epoch": 1.3843886003088586, "grad_norm": 0.4540977478027344, "learning_rate": 6.520822953524065e-06, "loss": 0.4513, "step": 3287 }, { "epoch": 1.3848097711638354, "grad_norm": 0.4191276729106903, "learning_rate": 6.518487974208134e-06, "loss": 0.4088, "step": 3288 }, { "epoch": 1.3852309420188123, "grad_norm": 0.3995714783668518, "learning_rate": 6.516152630028592e-06, "loss": 0.4357, "step": 3289 }, { "epoch": 1.385652112873789, "grad_norm": 0.41886278986930847, "learning_rate": 6.513816921546582e-06, "loss": 0.4812, "step": 3290 }, { "epoch": 1.386073283728766, "grad_norm": 0.42888712882995605, "learning_rate": 6.511480849323323e-06, "loss": 0.4422, "step": 3291 }, { "epoch": 1.3864944545837428, "grad_norm": 0.4257363975048065, "learning_rate": 6.5091444139201346e-06, "loss": 0.4188, "step": 3292 }, { "epoch": 1.3869156254387196, "grad_norm": 0.4249933362007141, "learning_rate": 6.506807615898414e-06, "loss": 0.4568, "step": 3293 }, { "epoch": 1.3873367962936964, "grad_norm": 0.38013648986816406, "learning_rate": 6.504470455819651e-06, "loss": 0.3882, "step": 3294 }, { "epoch": 1.3877579671486733, "grad_norm": 0.42218512296676636, "learning_rate": 6.5021329342454195e-06, "loss": 0.4453, "step": 3295 }, { "epoch": 1.3881791380036501, "grad_norm": 0.4417435824871063, "learning_rate": 6.499795051737383e-06, "loss": 0.5145, "step": 3296 }, { "epoch": 1.388600308858627, "grad_norm": 0.39301425218582153, "learning_rate": 6.497456808857286e-06, "loss": 0.3979, "step": 3297 }, { "epoch": 1.3890214797136038, "grad_norm": 0.39793944358825684, "learning_rate": 6.49511820616697e-06, "loss": 0.4408, "step": 3298 }, { "epoch": 1.3894426505685806, "grad_norm": 0.3766082525253296, "learning_rate": 6.492779244228348e-06, "loss": 0.4159, "step": 3299 }, { "epoch": 1.3898638214235575, "grad_norm": 0.5049134492874146, "learning_rate": 6.490439923603435e-06, "loss": 0.4495, "step": 3300 }, { "epoch": 1.3902849922785343, "grad_norm": 0.4069199860095978, "learning_rate": 6.488100244854322e-06, "loss": 0.4312, "step": 3301 }, { "epoch": 1.3907061631335111, "grad_norm": 0.40784183144569397, "learning_rate": 6.485760208543188e-06, "loss": 0.3986, "step": 3302 }, { "epoch": 1.391127333988488, "grad_norm": 0.44029805064201355, "learning_rate": 6.483419815232301e-06, "loss": 0.4519, "step": 3303 }, { "epoch": 1.3915485048434648, "grad_norm": 0.43678298592567444, "learning_rate": 6.481079065484013e-06, "loss": 0.4655, "step": 3304 }, { "epoch": 1.3919696756984417, "grad_norm": 0.4024921953678131, "learning_rate": 6.4787379598607605e-06, "loss": 0.3951, "step": 3305 }, { "epoch": 1.3923908465534185, "grad_norm": 0.4811380207538605, "learning_rate": 6.476396498925066e-06, "loss": 0.5133, "step": 3306 }, { "epoch": 1.3928120174083953, "grad_norm": 0.46156543493270874, "learning_rate": 6.474054683239537e-06, "loss": 0.4731, "step": 3307 }, { "epoch": 1.3932331882633722, "grad_norm": 0.46424001455307007, "learning_rate": 6.471712513366869e-06, "loss": 0.4149, "step": 3308 }, { "epoch": 1.393654359118349, "grad_norm": 0.4303872883319855, "learning_rate": 6.469369989869839e-06, "loss": 0.4388, "step": 3309 }, { "epoch": 1.3940755299733258, "grad_norm": 0.3728850483894348, "learning_rate": 6.467027113311311e-06, "loss": 0.3526, "step": 3310 }, { "epoch": 1.3944967008283027, "grad_norm": 0.46979260444641113, "learning_rate": 6.464683884254235e-06, "loss": 0.4516, "step": 3311 }, { "epoch": 1.3949178716832795, "grad_norm": 0.40525609254837036, "learning_rate": 6.462340303261643e-06, "loss": 0.4054, "step": 3312 }, { "epoch": 1.3953390425382564, "grad_norm": 0.4075617492198944, "learning_rate": 6.459996370896653e-06, "loss": 0.4548, "step": 3313 }, { "epoch": 1.3957602133932332, "grad_norm": 0.4434768855571747, "learning_rate": 6.4576520877224644e-06, "loss": 0.4691, "step": 3314 }, { "epoch": 1.39618138424821, "grad_norm": 0.4148203432559967, "learning_rate": 6.455307454302366e-06, "loss": 0.4235, "step": 3315 }, { "epoch": 1.3966025551031869, "grad_norm": 0.4354117512702942, "learning_rate": 6.452962471199729e-06, "loss": 0.472, "step": 3316 }, { "epoch": 1.3970237259581637, "grad_norm": 0.40328657627105713, "learning_rate": 6.4506171389780095e-06, "loss": 0.4343, "step": 3317 }, { "epoch": 1.3974448968131405, "grad_norm": 0.40146031975746155, "learning_rate": 6.4482714582007385e-06, "loss": 0.4294, "step": 3318 }, { "epoch": 1.3978660676681174, "grad_norm": 0.4115521013736725, "learning_rate": 6.4459254294315466e-06, "loss": 0.4364, "step": 3319 }, { "epoch": 1.3982872385230942, "grad_norm": 0.4261729121208191, "learning_rate": 6.443579053234137e-06, "loss": 0.3986, "step": 3320 }, { "epoch": 1.398708409378071, "grad_norm": 0.4762038290500641, "learning_rate": 6.441232330172297e-06, "loss": 0.4363, "step": 3321 }, { "epoch": 1.399129580233048, "grad_norm": 0.4513753056526184, "learning_rate": 6.4388852608099005e-06, "loss": 0.5025, "step": 3322 }, { "epoch": 1.3995507510880247, "grad_norm": 0.447808176279068, "learning_rate": 6.436537845710904e-06, "loss": 0.4539, "step": 3323 }, { "epoch": 1.3999719219430016, "grad_norm": 0.4210204780101776, "learning_rate": 6.4341900854393455e-06, "loss": 0.4782, "step": 3324 }, { "epoch": 1.4003930927979784, "grad_norm": 0.41832056641578674, "learning_rate": 6.431841980559348e-06, "loss": 0.4065, "step": 3325 }, { "epoch": 1.4008142636529552, "grad_norm": 0.4708201289176941, "learning_rate": 6.429493531635115e-06, "loss": 0.4571, "step": 3326 }, { "epoch": 1.401235434507932, "grad_norm": 0.4703104794025421, "learning_rate": 6.427144739230935e-06, "loss": 0.4271, "step": 3327 }, { "epoch": 1.401656605362909, "grad_norm": 0.4772248864173889, "learning_rate": 6.4247956039111755e-06, "loss": 0.49, "step": 3328 }, { "epoch": 1.4020777762178858, "grad_norm": 0.42949816584587097, "learning_rate": 6.422446126240292e-06, "loss": 0.3859, "step": 3329 }, { "epoch": 1.4024989470728626, "grad_norm": 0.4753066599369049, "learning_rate": 6.420096306782816e-06, "loss": 0.4333, "step": 3330 }, { "epoch": 1.4029201179278394, "grad_norm": 0.4602707624435425, "learning_rate": 6.4177461461033675e-06, "loss": 0.4748, "step": 3331 }, { "epoch": 1.4033412887828163, "grad_norm": 0.45823952555656433, "learning_rate": 6.415395644766642e-06, "loss": 0.4661, "step": 3332 }, { "epoch": 1.4037624596377931, "grad_norm": 0.4465954601764679, "learning_rate": 6.413044803337423e-06, "loss": 0.457, "step": 3333 }, { "epoch": 1.40418363049277, "grad_norm": 0.4483569860458374, "learning_rate": 6.41069362238057e-06, "loss": 0.4324, "step": 3334 }, { "epoch": 1.4046048013477468, "grad_norm": 0.46016398072242737, "learning_rate": 6.408342102461027e-06, "loss": 0.4594, "step": 3335 }, { "epoch": 1.4050259722027236, "grad_norm": 0.45089882612228394, "learning_rate": 6.40599024414382e-06, "loss": 0.4461, "step": 3336 }, { "epoch": 1.4054471430577005, "grad_norm": 0.3845515549182892, "learning_rate": 6.403638047994057e-06, "loss": 0.3995, "step": 3337 }, { "epoch": 1.4058683139126773, "grad_norm": 0.40404531359672546, "learning_rate": 6.401285514576924e-06, "loss": 0.432, "step": 3338 }, { "epoch": 1.4062894847676541, "grad_norm": 0.42251813411712646, "learning_rate": 6.398932644457689e-06, "loss": 0.4303, "step": 3339 }, { "epoch": 1.406710655622631, "grad_norm": 0.44028255343437195, "learning_rate": 6.396579438201706e-06, "loss": 0.4266, "step": 3340 }, { "epoch": 1.4071318264776078, "grad_norm": 0.3897727429866791, "learning_rate": 6.3942258963744e-06, "loss": 0.4164, "step": 3341 }, { "epoch": 1.4075529973325847, "grad_norm": 0.3924030661582947, "learning_rate": 6.391872019541283e-06, "loss": 0.3407, "step": 3342 }, { "epoch": 1.4079741681875615, "grad_norm": 0.4690360128879547, "learning_rate": 6.38951780826795e-06, "loss": 0.47, "step": 3343 }, { "epoch": 1.4083953390425383, "grad_norm": 0.4424431324005127, "learning_rate": 6.38716326312007e-06, "loss": 0.4703, "step": 3344 }, { "epoch": 1.4088165098975152, "grad_norm": 0.4338817596435547, "learning_rate": 6.384808384663396e-06, "loss": 0.4109, "step": 3345 }, { "epoch": 1.409237680752492, "grad_norm": 0.4662185311317444, "learning_rate": 6.3824531734637604e-06, "loss": 0.482, "step": 3346 }, { "epoch": 1.4096588516074688, "grad_norm": 0.4544404447078705, "learning_rate": 6.380097630087077e-06, "loss": 0.4451, "step": 3347 }, { "epoch": 1.4100800224624457, "grad_norm": 0.43527618050575256, "learning_rate": 6.377741755099334e-06, "loss": 0.4108, "step": 3348 }, { "epoch": 1.4105011933174225, "grad_norm": 0.4516642093658447, "learning_rate": 6.375385549066606e-06, "loss": 0.4485, "step": 3349 }, { "epoch": 1.4109223641723991, "grad_norm": 0.4597243666648865, "learning_rate": 6.373029012555043e-06, "loss": 0.4157, "step": 3350 }, { "epoch": 1.4113435350273762, "grad_norm": 0.4290796220302582, "learning_rate": 6.370672146130876e-06, "loss": 0.4716, "step": 3351 }, { "epoch": 1.4117647058823528, "grad_norm": 0.3901655375957489, "learning_rate": 6.368314950360416e-06, "loss": 0.4015, "step": 3352 }, { "epoch": 1.4121858767373299, "grad_norm": 0.4585454761981964, "learning_rate": 6.3659574258100495e-06, "loss": 0.4998, "step": 3353 }, { "epoch": 1.4126070475923065, "grad_norm": 0.4112253189086914, "learning_rate": 6.363599573046245e-06, "loss": 0.4685, "step": 3354 }, { "epoch": 1.4130282184472835, "grad_norm": 0.4468713402748108, "learning_rate": 6.361241392635553e-06, "loss": 0.4384, "step": 3355 }, { "epoch": 1.4134493893022602, "grad_norm": 0.4703979790210724, "learning_rate": 6.358882885144595e-06, "loss": 0.5127, "step": 3356 }, { "epoch": 1.4138705601572372, "grad_norm": 0.4044097363948822, "learning_rate": 6.356524051140078e-06, "loss": 0.4047, "step": 3357 }, { "epoch": 1.4142917310122138, "grad_norm": 0.4325048625469208, "learning_rate": 6.354164891188782e-06, "loss": 0.4757, "step": 3358 }, { "epoch": 1.414712901867191, "grad_norm": 0.4076899588108063, "learning_rate": 6.351805405857571e-06, "loss": 0.431, "step": 3359 }, { "epoch": 1.4151340727221675, "grad_norm": 0.45856037735939026, "learning_rate": 6.349445595713383e-06, "loss": 0.4518, "step": 3360 }, { "epoch": 1.4155552435771446, "grad_norm": 0.4642353653907776, "learning_rate": 6.347085461323233e-06, "loss": 0.481, "step": 3361 }, { "epoch": 1.4159764144321212, "grad_norm": 0.43146979808807373, "learning_rate": 6.34472500325422e-06, "loss": 0.4147, "step": 3362 }, { "epoch": 1.4163975852870982, "grad_norm": 0.4911706745624542, "learning_rate": 6.342364222073514e-06, "loss": 0.5092, "step": 3363 }, { "epoch": 1.4168187561420749, "grad_norm": 0.48695018887519836, "learning_rate": 6.340003118348366e-06, "loss": 0.4943, "step": 3364 }, { "epoch": 1.417239926997052, "grad_norm": 0.38040879368782043, "learning_rate": 6.337641692646106e-06, "loss": 0.3453, "step": 3365 }, { "epoch": 1.4176610978520285, "grad_norm": 0.48408830165863037, "learning_rate": 6.335279945534135e-06, "loss": 0.4941, "step": 3366 }, { "epoch": 1.4180822687070056, "grad_norm": 0.501594603061676, "learning_rate": 6.332917877579942e-06, "loss": 0.4246, "step": 3367 }, { "epoch": 1.4185034395619822, "grad_norm": 0.4126393496990204, "learning_rate": 6.330555489351082e-06, "loss": 0.396, "step": 3368 }, { "epoch": 1.4189246104169593, "grad_norm": 0.5610701441764832, "learning_rate": 6.3281927814151925e-06, "loss": 0.4897, "step": 3369 }, { "epoch": 1.419345781271936, "grad_norm": 0.4124129116535187, "learning_rate": 6.325829754339989e-06, "loss": 0.4207, "step": 3370 }, { "epoch": 1.419766952126913, "grad_norm": 0.5044605731964111, "learning_rate": 6.323466408693258e-06, "loss": 0.4368, "step": 3371 }, { "epoch": 1.4201881229818896, "grad_norm": 0.4191031754016876, "learning_rate": 6.321102745042872e-06, "loss": 0.4009, "step": 3372 }, { "epoch": 1.4206092938368666, "grad_norm": 0.4546490013599396, "learning_rate": 6.318738763956769e-06, "loss": 0.4706, "step": 3373 }, { "epoch": 1.4210304646918432, "grad_norm": 0.4050762951374054, "learning_rate": 6.316374466002969e-06, "loss": 0.4052, "step": 3374 }, { "epoch": 1.42145163554682, "grad_norm": 0.4868549406528473, "learning_rate": 6.314009851749569e-06, "loss": 0.5067, "step": 3375 }, { "epoch": 1.421872806401797, "grad_norm": 0.47092750668525696, "learning_rate": 6.311644921764742e-06, "loss": 0.4837, "step": 3376 }, { "epoch": 1.4222939772567738, "grad_norm": 0.45662614703178406, "learning_rate": 6.309279676616732e-06, "loss": 0.4416, "step": 3377 }, { "epoch": 1.4227151481117506, "grad_norm": 0.4358850121498108, "learning_rate": 6.306914116873863e-06, "loss": 0.4238, "step": 3378 }, { "epoch": 1.4231363189667274, "grad_norm": 0.41963115334510803, "learning_rate": 6.3045482431045376e-06, "loss": 0.407, "step": 3379 }, { "epoch": 1.4235574898217043, "grad_norm": 0.48282042145729065, "learning_rate": 6.302182055877225e-06, "loss": 0.4635, "step": 3380 }, { "epoch": 1.423978660676681, "grad_norm": 0.48878833651542664, "learning_rate": 6.299815555760478e-06, "loss": 0.5367, "step": 3381 }, { "epoch": 1.424399831531658, "grad_norm": 0.395994633436203, "learning_rate": 6.297448743322918e-06, "loss": 0.4295, "step": 3382 }, { "epoch": 1.4248210023866348, "grad_norm": 0.4016983211040497, "learning_rate": 6.295081619133247e-06, "loss": 0.42, "step": 3383 }, { "epoch": 1.4252421732416116, "grad_norm": 0.44985443353652954, "learning_rate": 6.292714183760242e-06, "loss": 0.4853, "step": 3384 }, { "epoch": 1.4256633440965885, "grad_norm": 0.4739561378955841, "learning_rate": 6.2903464377727485e-06, "loss": 0.4693, "step": 3385 }, { "epoch": 1.4260845149515653, "grad_norm": 0.4077162742614746, "learning_rate": 6.28797838173969e-06, "loss": 0.4142, "step": 3386 }, { "epoch": 1.4265056858065421, "grad_norm": 0.3909000754356384, "learning_rate": 6.285610016230069e-06, "loss": 0.4471, "step": 3387 }, { "epoch": 1.426926856661519, "grad_norm": 0.45432010293006897, "learning_rate": 6.283241341812957e-06, "loss": 0.4342, "step": 3388 }, { "epoch": 1.4273480275164958, "grad_norm": 0.46933960914611816, "learning_rate": 6.2808723590575e-06, "loss": 0.4237, "step": 3389 }, { "epoch": 1.4277691983714726, "grad_norm": 0.4277940094470978, "learning_rate": 6.278503068532917e-06, "loss": 0.4545, "step": 3390 }, { "epoch": 1.4281903692264495, "grad_norm": 0.40979793667793274, "learning_rate": 6.2761334708085095e-06, "loss": 0.4589, "step": 3391 }, { "epoch": 1.4286115400814263, "grad_norm": 0.43038445711135864, "learning_rate": 6.273763566453642e-06, "loss": 0.4436, "step": 3392 }, { "epoch": 1.4290327109364032, "grad_norm": 0.42212817072868347, "learning_rate": 6.271393356037758e-06, "loss": 0.4012, "step": 3393 }, { "epoch": 1.42945388179138, "grad_norm": 0.42116767168045044, "learning_rate": 6.269022840130374e-06, "loss": 0.3888, "step": 3394 }, { "epoch": 1.4298750526463568, "grad_norm": 0.40638214349746704, "learning_rate": 6.26665201930108e-06, "loss": 0.4142, "step": 3395 }, { "epoch": 1.4302962235013337, "grad_norm": 0.4231417179107666, "learning_rate": 6.2642808941195365e-06, "loss": 0.4519, "step": 3396 }, { "epoch": 1.4307173943563105, "grad_norm": 0.4425640106201172, "learning_rate": 6.261909465155482e-06, "loss": 0.4425, "step": 3397 }, { "epoch": 1.4311385652112874, "grad_norm": 0.47447049617767334, "learning_rate": 6.2595377329787254e-06, "loss": 0.457, "step": 3398 }, { "epoch": 1.4315597360662642, "grad_norm": 0.4240286350250244, "learning_rate": 6.257165698159149e-06, "loss": 0.4161, "step": 3399 }, { "epoch": 1.431980906921241, "grad_norm": 0.44401252269744873, "learning_rate": 6.254793361266706e-06, "loss": 0.4274, "step": 3400 }, { "epoch": 1.4324020777762179, "grad_norm": 0.46364349126815796, "learning_rate": 6.252420722871424e-06, "loss": 0.513, "step": 3401 }, { "epoch": 1.4328232486311947, "grad_norm": 0.5088035464286804, "learning_rate": 6.250047783543402e-06, "loss": 0.437, "step": 3402 }, { "epoch": 1.4332444194861715, "grad_norm": 0.4253384470939636, "learning_rate": 6.247674543852812e-06, "loss": 0.4466, "step": 3403 }, { "epoch": 1.4336655903411484, "grad_norm": 0.4068525731563568, "learning_rate": 6.2453010043699e-06, "loss": 0.4372, "step": 3404 }, { "epoch": 1.4340867611961252, "grad_norm": 0.4526945948600769, "learning_rate": 6.24292716566498e-06, "loss": 0.5024, "step": 3405 }, { "epoch": 1.434507932051102, "grad_norm": 0.4268932342529297, "learning_rate": 6.2405530283084406e-06, "loss": 0.3949, "step": 3406 }, { "epoch": 1.434929102906079, "grad_norm": 0.4918173551559448, "learning_rate": 6.238178592870744e-06, "loss": 0.4884, "step": 3407 }, { "epoch": 1.4353502737610557, "grad_norm": 0.3964781165122986, "learning_rate": 6.2358038599224165e-06, "loss": 0.4475, "step": 3408 }, { "epoch": 1.4357714446160326, "grad_norm": 0.406276673078537, "learning_rate": 6.233428830034066e-06, "loss": 0.4301, "step": 3409 }, { "epoch": 1.4361926154710094, "grad_norm": 0.4833310842514038, "learning_rate": 6.231053503776363e-06, "loss": 0.4106, "step": 3410 }, { "epoch": 1.4366137863259862, "grad_norm": 0.449858158826828, "learning_rate": 6.228677881720056e-06, "loss": 0.4503, "step": 3411 }, { "epoch": 1.437034957180963, "grad_norm": 0.4730953574180603, "learning_rate": 6.22630196443596e-06, "loss": 0.4739, "step": 3412 }, { "epoch": 1.43745612803594, "grad_norm": 0.43938517570495605, "learning_rate": 6.223925752494961e-06, "loss": 0.4306, "step": 3413 }, { "epoch": 1.4378772988909168, "grad_norm": 0.4686345160007477, "learning_rate": 6.221549246468022e-06, "loss": 0.4451, "step": 3414 }, { "epoch": 1.4382984697458936, "grad_norm": 0.43199965357780457, "learning_rate": 6.219172446926166e-06, "loss": 0.4404, "step": 3415 }, { "epoch": 1.4387196406008704, "grad_norm": 0.4266003370285034, "learning_rate": 6.2167953544404955e-06, "loss": 0.4182, "step": 3416 }, { "epoch": 1.4391408114558473, "grad_norm": 0.42705532908439636, "learning_rate": 6.2144179695821814e-06, "loss": 0.4065, "step": 3417 }, { "epoch": 1.439561982310824, "grad_norm": 0.5727313160896301, "learning_rate": 6.212040292922462e-06, "loss": 0.497, "step": 3418 }, { "epoch": 1.439983153165801, "grad_norm": 0.4563077390193939, "learning_rate": 6.2096623250326485e-06, "loss": 0.4538, "step": 3419 }, { "epoch": 1.4404043240207778, "grad_norm": 0.4947335422039032, "learning_rate": 6.20728406648412e-06, "loss": 0.4766, "step": 3420 }, { "epoch": 1.4408254948757546, "grad_norm": 0.4471968412399292, "learning_rate": 6.204905517848327e-06, "loss": 0.4624, "step": 3421 }, { "epoch": 1.4412466657307315, "grad_norm": 0.47324174642562866, "learning_rate": 6.20252667969679e-06, "loss": 0.5011, "step": 3422 }, { "epoch": 1.4416678365857083, "grad_norm": 0.4522571563720703, "learning_rate": 6.200147552601097e-06, "loss": 0.4246, "step": 3423 }, { "epoch": 1.4420890074406851, "grad_norm": 0.49731647968292236, "learning_rate": 6.197768137132909e-06, "loss": 0.4707, "step": 3424 }, { "epoch": 1.442510178295662, "grad_norm": 0.4390016198158264, "learning_rate": 6.195388433863951e-06, "loss": 0.4489, "step": 3425 }, { "epoch": 1.4429313491506388, "grad_norm": 0.43620628118515015, "learning_rate": 6.193008443366022e-06, "loss": 0.4102, "step": 3426 }, { "epoch": 1.4433525200056156, "grad_norm": 0.46619582176208496, "learning_rate": 6.190628166210987e-06, "loss": 0.4936, "step": 3427 }, { "epoch": 1.4437736908605925, "grad_norm": 0.4958183765411377, "learning_rate": 6.188247602970783e-06, "loss": 0.4336, "step": 3428 }, { "epoch": 1.4441948617155693, "grad_norm": 0.41677260398864746, "learning_rate": 6.185866754217411e-06, "loss": 0.4189, "step": 3429 }, { "epoch": 1.4446160325705462, "grad_norm": 0.45021170377731323, "learning_rate": 6.183485620522946e-06, "loss": 0.4705, "step": 3430 }, { "epoch": 1.445037203425523, "grad_norm": 0.46519631147384644, "learning_rate": 6.1811042024595296e-06, "loss": 0.4636, "step": 3431 }, { "epoch": 1.4454583742804998, "grad_norm": 0.4843187630176544, "learning_rate": 6.178722500599368e-06, "loss": 0.4503, "step": 3432 }, { "epoch": 1.4458795451354767, "grad_norm": 0.47655925154685974, "learning_rate": 6.176340515514738e-06, "loss": 0.4757, "step": 3433 }, { "epoch": 1.4463007159904535, "grad_norm": 0.38512125611305237, "learning_rate": 6.173958247777989e-06, "loss": 0.4154, "step": 3434 }, { "epoch": 1.4467218868454303, "grad_norm": 0.4622701108455658, "learning_rate": 6.171575697961533e-06, "loss": 0.4979, "step": 3435 }, { "epoch": 1.4471430577004072, "grad_norm": 0.43479543924331665, "learning_rate": 6.169192866637848e-06, "loss": 0.437, "step": 3436 }, { "epoch": 1.447564228555384, "grad_norm": 0.4823412001132965, "learning_rate": 6.166809754379487e-06, "loss": 0.4818, "step": 3437 }, { "epoch": 1.4479853994103609, "grad_norm": 0.4610435366630554, "learning_rate": 6.1644263617590635e-06, "loss": 0.4689, "step": 3438 }, { "epoch": 1.4484065702653377, "grad_norm": 0.5146170854568481, "learning_rate": 6.1620426893492645e-06, "loss": 0.5234, "step": 3439 }, { "epoch": 1.4488277411203145, "grad_norm": 0.41510874032974243, "learning_rate": 6.159658737722837e-06, "loss": 0.4436, "step": 3440 }, { "epoch": 1.4492489119752914, "grad_norm": 0.44266238808631897, "learning_rate": 6.1572745074526005e-06, "loss": 0.5119, "step": 3441 }, { "epoch": 1.4496700828302682, "grad_norm": 0.4412792921066284, "learning_rate": 6.15488999911144e-06, "loss": 0.3925, "step": 3442 }, { "epoch": 1.450091253685245, "grad_norm": 0.4878600835800171, "learning_rate": 6.152505213272308e-06, "loss": 0.4695, "step": 3443 }, { "epoch": 1.450512424540222, "grad_norm": 0.36594340205192566, "learning_rate": 6.150120150508221e-06, "loss": 0.3762, "step": 3444 }, { "epoch": 1.4509335953951987, "grad_norm": 0.39831066131591797, "learning_rate": 6.147734811392264e-06, "loss": 0.4642, "step": 3445 }, { "epoch": 1.4513547662501756, "grad_norm": 0.4335394501686096, "learning_rate": 6.145349196497592e-06, "loss": 0.4352, "step": 3446 }, { "epoch": 1.4517759371051524, "grad_norm": 0.4280918538570404, "learning_rate": 6.1429633063974185e-06, "loss": 0.4397, "step": 3447 }, { "epoch": 1.4521971079601292, "grad_norm": 0.4265702962875366, "learning_rate": 6.140577141665029e-06, "loss": 0.4392, "step": 3448 }, { "epoch": 1.4526182788151059, "grad_norm": 0.4158959984779358, "learning_rate": 6.138190702873773e-06, "loss": 0.415, "step": 3449 }, { "epoch": 1.453039449670083, "grad_norm": 0.4822803735733032, "learning_rate": 6.135803990597066e-06, "loss": 0.4868, "step": 3450 }, { "epoch": 1.4534606205250595, "grad_norm": 0.44347572326660156, "learning_rate": 6.133417005408391e-06, "loss": 0.4473, "step": 3451 }, { "epoch": 1.4538817913800366, "grad_norm": 0.41292595863342285, "learning_rate": 6.131029747881291e-06, "loss": 0.4108, "step": 3452 }, { "epoch": 1.4543029622350132, "grad_norm": 0.4797835052013397, "learning_rate": 6.1286422185893805e-06, "loss": 0.4629, "step": 3453 }, { "epoch": 1.4547241330899903, "grad_norm": 0.43516477942466736, "learning_rate": 6.126254418106339e-06, "loss": 0.4469, "step": 3454 }, { "epoch": 1.4551453039449669, "grad_norm": 0.44279050827026367, "learning_rate": 6.1238663470059054e-06, "loss": 0.4188, "step": 3455 }, { "epoch": 1.455566474799944, "grad_norm": 0.42997756600379944, "learning_rate": 6.121478005861892e-06, "loss": 0.4052, "step": 3456 }, { "epoch": 1.4559876456549206, "grad_norm": 0.48837074637413025, "learning_rate": 6.1190893952481664e-06, "loss": 0.4947, "step": 3457 }, { "epoch": 1.4564088165098976, "grad_norm": 0.45457062125205994, "learning_rate": 6.116700515738671e-06, "loss": 0.4578, "step": 3458 }, { "epoch": 1.4568299873648742, "grad_norm": 0.4113196134567261, "learning_rate": 6.114311367907405e-06, "loss": 0.4378, "step": 3459 }, { "epoch": 1.4572511582198513, "grad_norm": 0.40174752473831177, "learning_rate": 6.111921952328434e-06, "loss": 0.4402, "step": 3460 }, { "epoch": 1.457672329074828, "grad_norm": 0.4068834185600281, "learning_rate": 6.1095322695758915e-06, "loss": 0.4287, "step": 3461 }, { "epoch": 1.458093499929805, "grad_norm": 0.48058533668518066, "learning_rate": 6.107142320223971e-06, "loss": 0.469, "step": 3462 }, { "epoch": 1.4585146707847816, "grad_norm": 0.49052971601486206, "learning_rate": 6.104752104846932e-06, "loss": 0.4225, "step": 3463 }, { "epoch": 1.4589358416397586, "grad_norm": 0.44582587480545044, "learning_rate": 6.102361624019097e-06, "loss": 0.4398, "step": 3464 }, { "epoch": 1.4593570124947353, "grad_norm": 0.4990631639957428, "learning_rate": 6.0999708783148545e-06, "loss": 0.5347, "step": 3465 }, { "epoch": 1.4597781833497123, "grad_norm": 0.43735232949256897, "learning_rate": 6.097579868308654e-06, "loss": 0.4332, "step": 3466 }, { "epoch": 1.460199354204689, "grad_norm": 0.418622761964798, "learning_rate": 6.095188594575008e-06, "loss": 0.4029, "step": 3467 }, { "epoch": 1.460620525059666, "grad_norm": 0.40481817722320557, "learning_rate": 6.092797057688496e-06, "loss": 0.3971, "step": 3468 }, { "epoch": 1.4610416959146426, "grad_norm": 0.44201770424842834, "learning_rate": 6.090405258223756e-06, "loss": 0.4663, "step": 3469 }, { "epoch": 1.4614628667696197, "grad_norm": 0.43935826420783997, "learning_rate": 6.088013196755494e-06, "loss": 0.4404, "step": 3470 }, { "epoch": 1.4618840376245963, "grad_norm": 0.4162468910217285, "learning_rate": 6.085620873858477e-06, "loss": 0.3733, "step": 3471 }, { "epoch": 1.4623052084795733, "grad_norm": 0.4858304560184479, "learning_rate": 6.083228290107531e-06, "loss": 0.5168, "step": 3472 }, { "epoch": 1.46272637933455, "grad_norm": 0.40752148628234863, "learning_rate": 6.080835446077553e-06, "loss": 0.4069, "step": 3473 }, { "epoch": 1.4631475501895268, "grad_norm": 0.44166871905326843, "learning_rate": 6.078442342343494e-06, "loss": 0.4259, "step": 3474 }, { "epoch": 1.4635687210445036, "grad_norm": 0.4726661443710327, "learning_rate": 6.076048979480371e-06, "loss": 0.4615, "step": 3475 }, { "epoch": 1.4639898918994805, "grad_norm": 0.5003059506416321, "learning_rate": 6.073655358063265e-06, "loss": 0.4284, "step": 3476 }, { "epoch": 1.4644110627544573, "grad_norm": 0.42591750621795654, "learning_rate": 6.071261478667316e-06, "loss": 0.4212, "step": 3477 }, { "epoch": 1.4648322336094342, "grad_norm": 0.49020758271217346, "learning_rate": 6.0688673418677315e-06, "loss": 0.4724, "step": 3478 }, { "epoch": 1.465253404464411, "grad_norm": 0.46178004145622253, "learning_rate": 6.06647294823977e-06, "loss": 0.4621, "step": 3479 }, { "epoch": 1.4656745753193878, "grad_norm": 0.4503214657306671, "learning_rate": 6.0640782983587624e-06, "loss": 0.4297, "step": 3480 }, { "epoch": 1.4660957461743647, "grad_norm": 0.425968736410141, "learning_rate": 6.061683392800099e-06, "loss": 0.45, "step": 3481 }, { "epoch": 1.4665169170293415, "grad_norm": 0.3764156699180603, "learning_rate": 6.059288232139225e-06, "loss": 0.3943, "step": 3482 }, { "epoch": 1.4669380878843183, "grad_norm": 0.51284259557724, "learning_rate": 6.056892816951657e-06, "loss": 0.5011, "step": 3483 }, { "epoch": 1.4673592587392952, "grad_norm": 0.4210369288921356, "learning_rate": 6.054497147812962e-06, "loss": 0.4043, "step": 3484 }, { "epoch": 1.467780429594272, "grad_norm": 0.4554818570613861, "learning_rate": 6.052101225298778e-06, "loss": 0.4757, "step": 3485 }, { "epoch": 1.4682016004492489, "grad_norm": 0.40140578150749207, "learning_rate": 6.049705049984798e-06, "loss": 0.4172, "step": 3486 }, { "epoch": 1.4686227713042257, "grad_norm": 0.46240925788879395, "learning_rate": 6.047308622446775e-06, "loss": 0.4298, "step": 3487 }, { "epoch": 1.4690439421592025, "grad_norm": 0.4583427906036377, "learning_rate": 6.044911943260527e-06, "loss": 0.4697, "step": 3488 }, { "epoch": 1.4694651130141794, "grad_norm": 0.3966991901397705, "learning_rate": 6.04251501300193e-06, "loss": 0.4305, "step": 3489 }, { "epoch": 1.4698862838691562, "grad_norm": 0.40792906284332275, "learning_rate": 6.040117832246919e-06, "loss": 0.4248, "step": 3490 }, { "epoch": 1.470307454724133, "grad_norm": 0.457780659198761, "learning_rate": 6.037720401571493e-06, "loss": 0.4926, "step": 3491 }, { "epoch": 1.4707286255791099, "grad_norm": 0.446365088224411, "learning_rate": 6.035322721551705e-06, "loss": 0.4632, "step": 3492 }, { "epoch": 1.4711497964340867, "grad_norm": 0.39636948704719543, "learning_rate": 6.032924792763677e-06, "loss": 0.4465, "step": 3493 }, { "epoch": 1.4715709672890636, "grad_norm": 0.4120583236217499, "learning_rate": 6.030526615783581e-06, "loss": 0.482, "step": 3494 }, { "epoch": 1.4719921381440404, "grad_norm": 0.3949814438819885, "learning_rate": 6.0281281911876545e-06, "loss": 0.398, "step": 3495 }, { "epoch": 1.4724133089990172, "grad_norm": 0.4642089903354645, "learning_rate": 6.0257295195521924e-06, "loss": 0.4853, "step": 3496 }, { "epoch": 1.472834479853994, "grad_norm": 0.4117697775363922, "learning_rate": 6.0233306014535505e-06, "loss": 0.4239, "step": 3497 }, { "epoch": 1.473255650708971, "grad_norm": 0.40880343317985535, "learning_rate": 6.020931437468144e-06, "loss": 0.4532, "step": 3498 }, { "epoch": 1.4736768215639477, "grad_norm": 0.4252103567123413, "learning_rate": 6.018532028172444e-06, "loss": 0.4129, "step": 3499 }, { "epoch": 1.4740979924189246, "grad_norm": 0.4216277599334717, "learning_rate": 6.0161323741429815e-06, "loss": 0.4052, "step": 3500 }, { "epoch": 1.4745191632739014, "grad_norm": 0.4186052680015564, "learning_rate": 6.013732475956352e-06, "loss": 0.4361, "step": 3501 }, { "epoch": 1.4749403341288783, "grad_norm": 0.40781718492507935, "learning_rate": 6.0113323341892e-06, "loss": 0.418, "step": 3502 }, { "epoch": 1.475361504983855, "grad_norm": 0.4311645030975342, "learning_rate": 6.008931949418239e-06, "loss": 0.4435, "step": 3503 }, { "epoch": 1.475782675838832, "grad_norm": 0.41226568818092346, "learning_rate": 6.00653132222023e-06, "loss": 0.4054, "step": 3504 }, { "epoch": 1.4762038466938088, "grad_norm": 0.4652479588985443, "learning_rate": 6.004130453172002e-06, "loss": 0.5169, "step": 3505 }, { "epoch": 1.4766250175487856, "grad_norm": 0.4202100336551666, "learning_rate": 6.001729342850438e-06, "loss": 0.4165, "step": 3506 }, { "epoch": 1.4770461884037625, "grad_norm": 0.43530207872390747, "learning_rate": 5.999327991832474e-06, "loss": 0.4779, "step": 3507 }, { "epoch": 1.4774673592587393, "grad_norm": 0.4328177571296692, "learning_rate": 5.9969264006951135e-06, "loss": 0.4528, "step": 3508 }, { "epoch": 1.4778885301137161, "grad_norm": 0.471713662147522, "learning_rate": 5.994524570015411e-06, "loss": 0.4726, "step": 3509 }, { "epoch": 1.478309700968693, "grad_norm": 0.4297441244125366, "learning_rate": 5.992122500370481e-06, "loss": 0.4278, "step": 3510 }, { "epoch": 1.4787308718236698, "grad_norm": 0.41973525285720825, "learning_rate": 5.989720192337495e-06, "loss": 0.4334, "step": 3511 }, { "epoch": 1.4791520426786466, "grad_norm": 0.40829765796661377, "learning_rate": 5.98731764649368e-06, "loss": 0.4488, "step": 3512 }, { "epoch": 1.4795732135336235, "grad_norm": 0.388497531414032, "learning_rate": 5.984914863416325e-06, "loss": 0.4008, "step": 3513 }, { "epoch": 1.4799943843886003, "grad_norm": 0.5523315668106079, "learning_rate": 5.98251184368277e-06, "loss": 0.5771, "step": 3514 }, { "epoch": 1.4804155552435772, "grad_norm": 0.42603692412376404, "learning_rate": 5.980108587870415e-06, "loss": 0.3974, "step": 3515 }, { "epoch": 1.480836726098554, "grad_norm": 0.44884446263313293, "learning_rate": 5.977705096556718e-06, "loss": 0.4835, "step": 3516 }, { "epoch": 1.4812578969535308, "grad_norm": 0.3882291615009308, "learning_rate": 5.97530137031919e-06, "loss": 0.4113, "step": 3517 }, { "epoch": 1.4816790678085077, "grad_norm": 0.41352033615112305, "learning_rate": 5.972897409735403e-06, "loss": 0.4572, "step": 3518 }, { "epoch": 1.4821002386634845, "grad_norm": 0.40910881757736206, "learning_rate": 5.9704932153829795e-06, "loss": 0.432, "step": 3519 }, { "epoch": 1.4825214095184613, "grad_norm": 0.4740121066570282, "learning_rate": 5.968088787839603e-06, "loss": 0.4874, "step": 3520 }, { "epoch": 1.4829425803734382, "grad_norm": 0.4141025245189667, "learning_rate": 5.9656841276830135e-06, "loss": 0.4112, "step": 3521 }, { "epoch": 1.483363751228415, "grad_norm": 0.45610588788986206, "learning_rate": 5.963279235491002e-06, "loss": 0.4318, "step": 3522 }, { "epoch": 1.4837849220833919, "grad_norm": 0.43570682406425476, "learning_rate": 5.9608741118414186e-06, "loss": 0.4532, "step": 3523 }, { "epoch": 1.4842060929383687, "grad_norm": 0.44732365012168884, "learning_rate": 5.958468757312168e-06, "loss": 0.4257, "step": 3524 }, { "epoch": 1.4846272637933455, "grad_norm": 0.47034287452697754, "learning_rate": 5.956063172481215e-06, "loss": 0.4485, "step": 3525 }, { "epoch": 1.4850484346483224, "grad_norm": 0.498238742351532, "learning_rate": 5.953657357926569e-06, "loss": 0.4592, "step": 3526 }, { "epoch": 1.4854696055032992, "grad_norm": 0.5016692280769348, "learning_rate": 5.9512513142263055e-06, "loss": 0.5542, "step": 3527 }, { "epoch": 1.485890776358276, "grad_norm": 0.44026145339012146, "learning_rate": 5.948845041958549e-06, "loss": 0.4808, "step": 3528 }, { "epoch": 1.4863119472132529, "grad_norm": 0.42685043811798096, "learning_rate": 5.9464385417014825e-06, "loss": 0.4095, "step": 3529 }, { "epoch": 1.4867331180682297, "grad_norm": 0.4464593231678009, "learning_rate": 5.944031814033342e-06, "loss": 0.4182, "step": 3530 }, { "epoch": 1.4871542889232066, "grad_norm": 0.44800832867622375, "learning_rate": 5.941624859532414e-06, "loss": 0.458, "step": 3531 }, { "epoch": 1.4875754597781834, "grad_norm": 0.4267199635505676, "learning_rate": 5.939217678777048e-06, "loss": 0.4101, "step": 3532 }, { "epoch": 1.4879966306331602, "grad_norm": 0.4096605181694031, "learning_rate": 5.936810272345644e-06, "loss": 0.4448, "step": 3533 }, { "epoch": 1.488417801488137, "grad_norm": 0.44703254103660583, "learning_rate": 5.934402640816652e-06, "loss": 0.4496, "step": 3534 }, { "epoch": 1.488838972343114, "grad_norm": 0.43451958894729614, "learning_rate": 5.931994784768582e-06, "loss": 0.419, "step": 3535 }, { "epoch": 1.4892601431980907, "grad_norm": 0.4479016363620758, "learning_rate": 5.929586704779996e-06, "loss": 0.4372, "step": 3536 }, { "epoch": 1.4896813140530676, "grad_norm": 0.4165150225162506, "learning_rate": 5.927178401429509e-06, "loss": 0.4468, "step": 3537 }, { "epoch": 1.4901024849080444, "grad_norm": 0.4273483157157898, "learning_rate": 5.92476987529579e-06, "loss": 0.4639, "step": 3538 }, { "epoch": 1.4905236557630213, "grad_norm": 0.3978915810585022, "learning_rate": 5.922361126957562e-06, "loss": 0.4355, "step": 3539 }, { "epoch": 1.490944826617998, "grad_norm": 0.4431084394454956, "learning_rate": 5.9199521569936016e-06, "loss": 0.4314, "step": 3540 }, { "epoch": 1.491365997472975, "grad_norm": 0.4265385568141937, "learning_rate": 5.9175429659827365e-06, "loss": 0.4312, "step": 3541 }, { "epoch": 1.4917871683279518, "grad_norm": 0.4054577052593231, "learning_rate": 5.91513355450385e-06, "loss": 0.4464, "step": 3542 }, { "epoch": 1.4922083391829286, "grad_norm": 0.45534318685531616, "learning_rate": 5.912723923135879e-06, "loss": 0.4781, "step": 3543 }, { "epoch": 1.4926295100379054, "grad_norm": 0.4133487343788147, "learning_rate": 5.910314072457811e-06, "loss": 0.3557, "step": 3544 }, { "epoch": 1.4930506808928823, "grad_norm": 0.45382651686668396, "learning_rate": 5.9079040030486875e-06, "loss": 0.4515, "step": 3545 }, { "epoch": 1.4934718517478591, "grad_norm": 0.5002033710479736, "learning_rate": 5.9054937154876e-06, "loss": 0.4717, "step": 3546 }, { "epoch": 1.493893022602836, "grad_norm": 0.40300488471984863, "learning_rate": 5.9030832103536965e-06, "loss": 0.4013, "step": 3547 }, { "epoch": 1.4943141934578128, "grad_norm": 0.46080100536346436, "learning_rate": 5.900672488226174e-06, "loss": 0.45, "step": 3548 }, { "epoch": 1.4947353643127896, "grad_norm": 0.46328574419021606, "learning_rate": 5.898261549684284e-06, "loss": 0.4156, "step": 3549 }, { "epoch": 1.4951565351677663, "grad_norm": 0.461001455783844, "learning_rate": 5.89585039530733e-06, "loss": 0.4597, "step": 3550 }, { "epoch": 1.4955777060227433, "grad_norm": 0.39532577991485596, "learning_rate": 5.893439025674661e-06, "loss": 0.4555, "step": 3551 }, { "epoch": 1.49599887687772, "grad_norm": 0.4332932233810425, "learning_rate": 5.891027441365689e-06, "loss": 0.4634, "step": 3552 }, { "epoch": 1.496420047732697, "grad_norm": 0.4355771541595459, "learning_rate": 5.888615642959871e-06, "loss": 0.4341, "step": 3553 }, { "epoch": 1.4968412185876736, "grad_norm": 0.4403177797794342, "learning_rate": 5.886203631036712e-06, "loss": 0.4556, "step": 3554 }, { "epoch": 1.4972623894426507, "grad_norm": 0.4419057369232178, "learning_rate": 5.883791406175775e-06, "loss": 0.4548, "step": 3555 }, { "epoch": 1.4976835602976273, "grad_norm": 0.41842710971832275, "learning_rate": 5.881378968956671e-06, "loss": 0.4024, "step": 3556 }, { "epoch": 1.4981047311526043, "grad_norm": 0.40654122829437256, "learning_rate": 5.878966319959063e-06, "loss": 0.4046, "step": 3557 }, { "epoch": 1.498525902007581, "grad_norm": 0.4785381853580475, "learning_rate": 5.876553459762665e-06, "loss": 0.4718, "step": 3558 }, { "epoch": 1.498947072862558, "grad_norm": 0.4188203513622284, "learning_rate": 5.874140388947237e-06, "loss": 0.4203, "step": 3559 }, { "epoch": 1.4993682437175346, "grad_norm": 0.37511906027793884, "learning_rate": 5.871727108092601e-06, "loss": 0.3993, "step": 3560 }, { "epoch": 1.4997894145725117, "grad_norm": 0.46043938398361206, "learning_rate": 5.869313617778616e-06, "loss": 0.4688, "step": 3561 }, { "epoch": 1.5002105854274883, "grad_norm": 0.41568344831466675, "learning_rate": 5.866899918585199e-06, "loss": 0.4225, "step": 3562 }, { "epoch": 1.5006317562824654, "grad_norm": 0.4490108788013458, "learning_rate": 5.864486011092318e-06, "loss": 0.4417, "step": 3563 }, { "epoch": 1.501052927137442, "grad_norm": 0.5017501711845398, "learning_rate": 5.862071895879986e-06, "loss": 0.458, "step": 3564 }, { "epoch": 1.501474097992419, "grad_norm": 0.4308650493621826, "learning_rate": 5.859657573528271e-06, "loss": 0.4402, "step": 3565 }, { "epoch": 1.5018952688473957, "grad_norm": 0.4276522696018219, "learning_rate": 5.8572430446172866e-06, "loss": 0.4348, "step": 3566 }, { "epoch": 1.5023164397023727, "grad_norm": 0.45822057127952576, "learning_rate": 5.8548283097271985e-06, "loss": 0.4487, "step": 3567 }, { "epoch": 1.5027376105573493, "grad_norm": 0.5047731399536133, "learning_rate": 5.852413369438222e-06, "loss": 0.4846, "step": 3568 }, { "epoch": 1.5031587814123264, "grad_norm": 0.41245728731155396, "learning_rate": 5.849998224330621e-06, "loss": 0.4609, "step": 3569 }, { "epoch": 1.503579952267303, "grad_norm": 0.41556406021118164, "learning_rate": 5.847582874984709e-06, "loss": 0.4225, "step": 3570 }, { "epoch": 1.50400112312228, "grad_norm": 0.43815669417381287, "learning_rate": 5.8451673219808446e-06, "loss": 0.4409, "step": 3571 }, { "epoch": 1.5044222939772567, "grad_norm": 0.46866270899772644, "learning_rate": 5.842751565899443e-06, "loss": 0.4038, "step": 3572 }, { "epoch": 1.5048434648322337, "grad_norm": 0.43149781227111816, "learning_rate": 5.8403356073209636e-06, "loss": 0.4502, "step": 3573 }, { "epoch": 1.5052646356872104, "grad_norm": 0.44401460886001587, "learning_rate": 5.837919446825915e-06, "loss": 0.4378, "step": 3574 }, { "epoch": 1.5056858065421874, "grad_norm": 0.4154607951641083, "learning_rate": 5.835503084994852e-06, "loss": 0.4099, "step": 3575 }, { "epoch": 1.506106977397164, "grad_norm": 0.4368525743484497, "learning_rate": 5.8330865224083835e-06, "loss": 0.419, "step": 3576 }, { "epoch": 1.506528148252141, "grad_norm": 0.4576496481895447, "learning_rate": 5.83066975964716e-06, "loss": 0.4683, "step": 3577 }, { "epoch": 1.5069493191071177, "grad_norm": 0.45562857389450073, "learning_rate": 5.828252797291887e-06, "loss": 0.4217, "step": 3578 }, { "epoch": 1.5073704899620948, "grad_norm": 0.43980658054351807, "learning_rate": 5.825835635923311e-06, "loss": 0.4156, "step": 3579 }, { "epoch": 1.5077916608170714, "grad_norm": 0.4756838381290436, "learning_rate": 5.823418276122231e-06, "loss": 0.4774, "step": 3580 }, { "epoch": 1.5082128316720484, "grad_norm": 0.4045068621635437, "learning_rate": 5.821000718469493e-06, "loss": 0.4612, "step": 3581 }, { "epoch": 1.508634002527025, "grad_norm": 0.449955016374588, "learning_rate": 5.818582963545988e-06, "loss": 0.4097, "step": 3582 }, { "epoch": 1.5090551733820021, "grad_norm": 0.5101761221885681, "learning_rate": 5.8161650119326575e-06, "loss": 0.5011, "step": 3583 }, { "epoch": 1.5094763442369787, "grad_norm": 0.4145153760910034, "learning_rate": 5.813746864210489e-06, "loss": 0.4066, "step": 3584 }, { "epoch": 1.5098975150919558, "grad_norm": 0.39419761300086975, "learning_rate": 5.811328520960518e-06, "loss": 0.3979, "step": 3585 }, { "epoch": 1.5103186859469324, "grad_norm": 0.44477760791778564, "learning_rate": 5.808909982763825e-06, "loss": 0.5046, "step": 3586 }, { "epoch": 1.5107398568019093, "grad_norm": 0.35236844420433044, "learning_rate": 5.806491250201538e-06, "loss": 0.3698, "step": 3587 }, { "epoch": 1.511161027656886, "grad_norm": 0.4566875994205475, "learning_rate": 5.804072323854832e-06, "loss": 0.4523, "step": 3588 }, { "epoch": 1.511582198511863, "grad_norm": 0.42777132987976074, "learning_rate": 5.80165320430493e-06, "loss": 0.4415, "step": 3589 }, { "epoch": 1.5120033693668398, "grad_norm": 0.3902428150177002, "learning_rate": 5.7992338921331e-06, "loss": 0.3923, "step": 3590 }, { "epoch": 1.5124245402218166, "grad_norm": 0.4745739996433258, "learning_rate": 5.796814387920656e-06, "loss": 0.4997, "step": 3591 }, { "epoch": 1.5128457110767934, "grad_norm": 0.4315702021121979, "learning_rate": 5.794394692248959e-06, "loss": 0.4454, "step": 3592 }, { "epoch": 1.5132668819317703, "grad_norm": 0.5005574822425842, "learning_rate": 5.791974805699415e-06, "loss": 0.5079, "step": 3593 }, { "epoch": 1.5136880527867471, "grad_norm": 0.42799893021583557, "learning_rate": 5.789554728853476e-06, "loss": 0.4637, "step": 3594 }, { "epoch": 1.514109223641724, "grad_norm": 0.43452686071395874, "learning_rate": 5.7871344622926405e-06, "loss": 0.4106, "step": 3595 }, { "epoch": 1.5145303944967008, "grad_norm": 0.4024966061115265, "learning_rate": 5.784714006598454e-06, "loss": 0.4219, "step": 3596 }, { "epoch": 1.5149515653516776, "grad_norm": 0.3889031708240509, "learning_rate": 5.7822933623525046e-06, "loss": 0.3867, "step": 3597 }, { "epoch": 1.5153727362066545, "grad_norm": 0.42593032121658325, "learning_rate": 5.779872530136424e-06, "loss": 0.4637, "step": 3598 }, { "epoch": 1.5157939070616313, "grad_norm": 0.40879586338996887, "learning_rate": 5.777451510531895e-06, "loss": 0.428, "step": 3599 }, { "epoch": 1.5162150779166081, "grad_norm": 0.4401489198207855, "learning_rate": 5.775030304120643e-06, "loss": 0.3955, "step": 3600 }, { "epoch": 1.516636248771585, "grad_norm": 0.4640437066555023, "learning_rate": 5.772608911484436e-06, "loss": 0.4766, "step": 3601 }, { "epoch": 1.5170574196265618, "grad_norm": 0.4594890773296356, "learning_rate": 5.770187333205089e-06, "loss": 0.4702, "step": 3602 }, { "epoch": 1.5174785904815387, "grad_norm": 0.3858107030391693, "learning_rate": 5.767765569864459e-06, "loss": 0.406, "step": 3603 }, { "epoch": 1.5178997613365155, "grad_norm": 0.43838784098625183, "learning_rate": 5.76534362204445e-06, "loss": 0.4067, "step": 3604 }, { "epoch": 1.5183209321914923, "grad_norm": 0.5056606531143188, "learning_rate": 5.762921490327013e-06, "loss": 0.4904, "step": 3605 }, { "epoch": 1.5187421030464692, "grad_norm": 0.41794484853744507, "learning_rate": 5.7604991752941375e-06, "loss": 0.4198, "step": 3606 }, { "epoch": 1.519163273901446, "grad_norm": 0.4696882665157318, "learning_rate": 5.758076677527857e-06, "loss": 0.5144, "step": 3607 }, { "epoch": 1.5195844447564228, "grad_norm": 0.39819368720054626, "learning_rate": 5.755653997610256e-06, "loss": 0.3989, "step": 3608 }, { "epoch": 1.5200056156113997, "grad_norm": 0.40147486329078674, "learning_rate": 5.753231136123455e-06, "loss": 0.4482, "step": 3609 }, { "epoch": 1.5204267864663765, "grad_norm": 0.42953383922576904, "learning_rate": 5.750808093649622e-06, "loss": 0.441, "step": 3610 }, { "epoch": 1.5208479573213534, "grad_norm": 0.46583202481269836, "learning_rate": 5.748384870770969e-06, "loss": 0.4398, "step": 3611 }, { "epoch": 1.5212691281763302, "grad_norm": 0.5011401772499084, "learning_rate": 5.74596146806975e-06, "loss": 0.484, "step": 3612 }, { "epoch": 1.521690299031307, "grad_norm": 0.471087783575058, "learning_rate": 5.743537886128258e-06, "loss": 0.5098, "step": 3613 }, { "epoch": 1.5221114698862839, "grad_norm": 0.444746732711792, "learning_rate": 5.741114125528839e-06, "loss": 0.4253, "step": 3614 }, { "epoch": 1.5225326407412607, "grad_norm": 0.42459121346473694, "learning_rate": 5.738690186853875e-06, "loss": 0.4152, "step": 3615 }, { "epoch": 1.5229538115962375, "grad_norm": 0.4510626196861267, "learning_rate": 5.73626607068579e-06, "loss": 0.529, "step": 3616 }, { "epoch": 1.5233749824512144, "grad_norm": 0.40652787685394287, "learning_rate": 5.733841777607054e-06, "loss": 0.3533, "step": 3617 }, { "epoch": 1.5237961533061912, "grad_norm": 0.4810006320476532, "learning_rate": 5.731417308200176e-06, "loss": 0.4759, "step": 3618 }, { "epoch": 1.524217324161168, "grad_norm": 0.44330480694770813, "learning_rate": 5.728992663047714e-06, "loss": 0.4685, "step": 3619 }, { "epoch": 1.524638495016145, "grad_norm": 0.4318309426307678, "learning_rate": 5.726567842732262e-06, "loss": 0.4213, "step": 3620 }, { "epoch": 1.5250596658711217, "grad_norm": 0.4366401135921478, "learning_rate": 5.724142847836457e-06, "loss": 0.4134, "step": 3621 }, { "epoch": 1.5254808367260986, "grad_norm": 0.43672075867652893, "learning_rate": 5.7217176789429795e-06, "loss": 0.5138, "step": 3622 }, { "epoch": 1.5259020075810754, "grad_norm": 0.38922667503356934, "learning_rate": 5.719292336634551e-06, "loss": 0.4241, "step": 3623 }, { "epoch": 1.5263231784360523, "grad_norm": 0.4297131896018982, "learning_rate": 5.716866821493934e-06, "loss": 0.4615, "step": 3624 }, { "epoch": 1.526744349291029, "grad_norm": 0.45432063937187195, "learning_rate": 5.714441134103936e-06, "loss": 0.5051, "step": 3625 }, { "epoch": 1.527165520146006, "grad_norm": 0.3835216760635376, "learning_rate": 5.7120152750474e-06, "loss": 0.3906, "step": 3626 }, { "epoch": 1.5275866910009828, "grad_norm": 0.4378139078617096, "learning_rate": 5.709589244907216e-06, "loss": 0.469, "step": 3627 }, { "epoch": 1.5280078618559596, "grad_norm": 0.4435618817806244, "learning_rate": 5.707163044266313e-06, "loss": 0.4154, "step": 3628 }, { "epoch": 1.5284290327109364, "grad_norm": 0.4473835527896881, "learning_rate": 5.704736673707658e-06, "loss": 0.5174, "step": 3629 }, { "epoch": 1.5288502035659133, "grad_norm": 0.41914328932762146, "learning_rate": 5.702310133814262e-06, "loss": 0.4331, "step": 3630 }, { "epoch": 1.5292713744208901, "grad_norm": 0.45431119203567505, "learning_rate": 5.699883425169177e-06, "loss": 0.4275, "step": 3631 }, { "epoch": 1.529692545275867, "grad_norm": 0.43311312794685364, "learning_rate": 5.697456548355497e-06, "loss": 0.465, "step": 3632 }, { "epoch": 1.5301137161308438, "grad_norm": 0.385237455368042, "learning_rate": 5.69502950395635e-06, "loss": 0.3757, "step": 3633 }, { "epoch": 1.5305348869858206, "grad_norm": 0.515261709690094, "learning_rate": 5.69260229255491e-06, "loss": 0.4891, "step": 3634 }, { "epoch": 1.5309560578407975, "grad_norm": 0.40914610028266907, "learning_rate": 5.690174914734389e-06, "loss": 0.3595, "step": 3635 }, { "epoch": 1.5313772286957743, "grad_norm": 0.4283192753791809, "learning_rate": 5.6877473710780395e-06, "loss": 0.4736, "step": 3636 }, { "epoch": 1.531798399550751, "grad_norm": 0.45060357451438904, "learning_rate": 5.685319662169157e-06, "loss": 0.4782, "step": 3637 }, { "epoch": 1.532219570405728, "grad_norm": 0.4063017964363098, "learning_rate": 5.682891788591066e-06, "loss": 0.4365, "step": 3638 }, { "epoch": 1.5326407412607046, "grad_norm": 0.44601765275001526, "learning_rate": 5.680463750927146e-06, "loss": 0.4784, "step": 3639 }, { "epoch": 1.5330619121156817, "grad_norm": 0.4449863135814667, "learning_rate": 5.678035549760806e-06, "loss": 0.4185, "step": 3640 }, { "epoch": 1.5334830829706583, "grad_norm": 0.4012129008769989, "learning_rate": 5.6756071856754915e-06, "loss": 0.4228, "step": 3641 }, { "epoch": 1.5339042538256353, "grad_norm": 0.3844505250453949, "learning_rate": 5.673178659254698e-06, "loss": 0.4732, "step": 3642 }, { "epoch": 1.534325424680612, "grad_norm": 0.3791271150112152, "learning_rate": 5.67074997108195e-06, "loss": 0.3883, "step": 3643 }, { "epoch": 1.534746595535589, "grad_norm": 0.44495028257369995, "learning_rate": 5.668321121740817e-06, "loss": 0.4066, "step": 3644 }, { "epoch": 1.5351677663905656, "grad_norm": 0.4915428161621094, "learning_rate": 5.665892111814904e-06, "loss": 0.5008, "step": 3645 }, { "epoch": 1.5355889372455427, "grad_norm": 0.4587821662425995, "learning_rate": 5.663462941887855e-06, "loss": 0.4544, "step": 3646 }, { "epoch": 1.5360101081005193, "grad_norm": 0.4513724446296692, "learning_rate": 5.6610336125433564e-06, "loss": 0.4239, "step": 3647 }, { "epoch": 1.5364312789554964, "grad_norm": 0.44473913311958313, "learning_rate": 5.658604124365126e-06, "loss": 0.469, "step": 3648 }, { "epoch": 1.536852449810473, "grad_norm": 0.37063074111938477, "learning_rate": 5.656174477936923e-06, "loss": 0.3769, "step": 3649 }, { "epoch": 1.53727362066545, "grad_norm": 0.45096659660339355, "learning_rate": 5.653744673842547e-06, "loss": 0.5261, "step": 3650 }, { "epoch": 1.5376947915204267, "grad_norm": 0.471319317817688, "learning_rate": 5.651314712665833e-06, "loss": 0.4564, "step": 3651 }, { "epoch": 1.5381159623754037, "grad_norm": 0.47240200638771057, "learning_rate": 5.648884594990656e-06, "loss": 0.4961, "step": 3652 }, { "epoch": 1.5385371332303803, "grad_norm": 0.43917375802993774, "learning_rate": 5.6464543214009226e-06, "loss": 0.4492, "step": 3653 }, { "epoch": 1.5389583040853574, "grad_norm": 0.41511210799217224, "learning_rate": 5.644023892480583e-06, "loss": 0.4426, "step": 3654 }, { "epoch": 1.539379474940334, "grad_norm": 0.41949546337127686, "learning_rate": 5.641593308813624e-06, "loss": 0.3901, "step": 3655 }, { "epoch": 1.539800645795311, "grad_norm": 0.4903242290019989, "learning_rate": 5.639162570984067e-06, "loss": 0.4995, "step": 3656 }, { "epoch": 1.5402218166502877, "grad_norm": 0.4054003655910492, "learning_rate": 5.636731679575973e-06, "loss": 0.4243, "step": 3657 }, { "epoch": 1.5406429875052647, "grad_norm": 0.4625818133354187, "learning_rate": 5.634300635173435e-06, "loss": 0.4595, "step": 3658 }, { "epoch": 1.5410641583602414, "grad_norm": 0.40925973653793335, "learning_rate": 5.6318694383605935e-06, "loss": 0.4378, "step": 3659 }, { "epoch": 1.5414853292152184, "grad_norm": 0.4383421540260315, "learning_rate": 5.629438089721611e-06, "loss": 0.4385, "step": 3660 }, { "epoch": 1.541906500070195, "grad_norm": 0.46550998091697693, "learning_rate": 5.6270065898406986e-06, "loss": 0.4916, "step": 3661 }, { "epoch": 1.542327670925172, "grad_norm": 0.4524315595626831, "learning_rate": 5.624574939302099e-06, "loss": 0.4781, "step": 3662 }, { "epoch": 1.5427488417801487, "grad_norm": 0.515625536441803, "learning_rate": 5.622143138690088e-06, "loss": 0.4224, "step": 3663 }, { "epoch": 1.5431700126351258, "grad_norm": 0.4251876473426819, "learning_rate": 5.619711188588986e-06, "loss": 0.4442, "step": 3664 }, { "epoch": 1.5435911834901024, "grad_norm": 0.45560574531555176, "learning_rate": 5.61727908958314e-06, "loss": 0.5048, "step": 3665 }, { "epoch": 1.5440123543450794, "grad_norm": 0.41606661677360535, "learning_rate": 5.614846842256937e-06, "loss": 0.3837, "step": 3666 }, { "epoch": 1.544433525200056, "grad_norm": 0.4031437337398529, "learning_rate": 5.612414447194803e-06, "loss": 0.4356, "step": 3667 }, { "epoch": 1.5448546960550331, "grad_norm": 0.4538353681564331, "learning_rate": 5.609981904981191e-06, "loss": 0.4315, "step": 3668 }, { "epoch": 1.5452758669100097, "grad_norm": 0.4146525263786316, "learning_rate": 5.6075492162005985e-06, "loss": 0.4188, "step": 3669 }, { "epoch": 1.5456970377649868, "grad_norm": 0.43339288234710693, "learning_rate": 5.605116381437552e-06, "loss": 0.4746, "step": 3670 }, { "epoch": 1.5461182086199634, "grad_norm": 0.4011060893535614, "learning_rate": 5.6026834012766155e-06, "loss": 0.4126, "step": 3671 }, { "epoch": 1.5465393794749405, "grad_norm": 0.457621306180954, "learning_rate": 5.600250276302389e-06, "loss": 0.46, "step": 3672 }, { "epoch": 1.546960550329917, "grad_norm": 0.433769553899765, "learning_rate": 5.597817007099503e-06, "loss": 0.4085, "step": 3673 }, { "epoch": 1.5473817211848941, "grad_norm": 0.479857474565506, "learning_rate": 5.595383594252628e-06, "loss": 0.4696, "step": 3674 }, { "epoch": 1.5478028920398708, "grad_norm": 0.4535594880580902, "learning_rate": 5.592950038346465e-06, "loss": 0.4638, "step": 3675 }, { "epoch": 1.5482240628948478, "grad_norm": 0.4065956771373749, "learning_rate": 5.590516339965752e-06, "loss": 0.4061, "step": 3676 }, { "epoch": 1.5486452337498244, "grad_norm": 0.4687206745147705, "learning_rate": 5.588082499695261e-06, "loss": 0.3897, "step": 3677 }, { "epoch": 1.5490664046048015, "grad_norm": 0.43084269762039185, "learning_rate": 5.585648518119795e-06, "loss": 0.4632, "step": 3678 }, { "epoch": 1.549487575459778, "grad_norm": 0.42929813265800476, "learning_rate": 5.583214395824194e-06, "loss": 0.4552, "step": 3679 }, { "epoch": 1.5499087463147552, "grad_norm": 0.3924783170223236, "learning_rate": 5.580780133393332e-06, "loss": 0.4157, "step": 3680 }, { "epoch": 1.5503299171697318, "grad_norm": 0.44448503851890564, "learning_rate": 5.578345731412115e-06, "loss": 0.4254, "step": 3681 }, { "epoch": 1.5507510880247088, "grad_norm": 0.46340611577033997, "learning_rate": 5.575911190465483e-06, "loss": 0.4707, "step": 3682 }, { "epoch": 1.5511722588796855, "grad_norm": 0.5403115153312683, "learning_rate": 5.5734765111384095e-06, "loss": 0.4702, "step": 3683 }, { "epoch": 1.5515934297346625, "grad_norm": 0.4712477922439575, "learning_rate": 5.5710416940159026e-06, "loss": 0.4431, "step": 3684 }, { "epoch": 1.5520146005896391, "grad_norm": 0.4421420991420746, "learning_rate": 5.5686067396829994e-06, "loss": 0.4274, "step": 3685 }, { "epoch": 1.5524357714446162, "grad_norm": 0.4235234260559082, "learning_rate": 5.566171648724776e-06, "loss": 0.3897, "step": 3686 }, { "epoch": 1.5528569422995928, "grad_norm": 0.4290897250175476, "learning_rate": 5.563736421726338e-06, "loss": 0.4103, "step": 3687 }, { "epoch": 1.5532781131545697, "grad_norm": 0.42641568183898926, "learning_rate": 5.561301059272821e-06, "loss": 0.4324, "step": 3688 }, { "epoch": 1.5536992840095465, "grad_norm": 0.4607505202293396, "learning_rate": 5.5588655619494e-06, "loss": 0.4981, "step": 3689 }, { "epoch": 1.5541204548645233, "grad_norm": 0.4145461320877075, "learning_rate": 5.556429930341274e-06, "loss": 0.417, "step": 3690 }, { "epoch": 1.5545416257195002, "grad_norm": 0.4645404517650604, "learning_rate": 5.553994165033683e-06, "loss": 0.4823, "step": 3691 }, { "epoch": 1.554962796574477, "grad_norm": 0.3922984302043915, "learning_rate": 5.55155826661189e-06, "loss": 0.3895, "step": 3692 }, { "epoch": 1.5553839674294538, "grad_norm": 0.4507134258747101, "learning_rate": 5.549122235661198e-06, "loss": 0.4522, "step": 3693 }, { "epoch": 1.5558051382844307, "grad_norm": 0.44495415687561035, "learning_rate": 5.546686072766941e-06, "loss": 0.4598, "step": 3694 }, { "epoch": 1.5562263091394075, "grad_norm": 0.45359349250793457, "learning_rate": 5.544249778514478e-06, "loss": 0.4838, "step": 3695 }, { "epoch": 1.5566474799943844, "grad_norm": 0.39161375164985657, "learning_rate": 5.541813353489206e-06, "loss": 0.4102, "step": 3696 }, { "epoch": 1.5570686508493612, "grad_norm": 0.4423791468143463, "learning_rate": 5.539376798276549e-06, "loss": 0.442, "step": 3697 }, { "epoch": 1.557489821704338, "grad_norm": 0.44728371500968933, "learning_rate": 5.536940113461966e-06, "loss": 0.4465, "step": 3698 }, { "epoch": 1.5579109925593149, "grad_norm": 0.49021944403648376, "learning_rate": 5.534503299630949e-06, "loss": 0.4545, "step": 3699 }, { "epoch": 1.5583321634142917, "grad_norm": 0.43544742465019226, "learning_rate": 5.532066357369012e-06, "loss": 0.4438, "step": 3700 }, { "epoch": 1.5587533342692685, "grad_norm": 0.3680735230445862, "learning_rate": 5.52962928726171e-06, "loss": 0.4027, "step": 3701 }, { "epoch": 1.5591745051242454, "grad_norm": 0.4296952188014984, "learning_rate": 5.527192089894622e-06, "loss": 0.4393, "step": 3702 }, { "epoch": 1.5595956759792222, "grad_norm": 0.4497131407260895, "learning_rate": 5.5247547658533604e-06, "loss": 0.4431, "step": 3703 }, { "epoch": 1.560016846834199, "grad_norm": 0.4116608798503876, "learning_rate": 5.52231731572357e-06, "loss": 0.4679, "step": 3704 }, { "epoch": 1.560438017689176, "grad_norm": 0.4627755880355835, "learning_rate": 5.519879740090918e-06, "loss": 0.4521, "step": 3705 }, { "epoch": 1.5608591885441527, "grad_norm": 0.48558756709098816, "learning_rate": 5.517442039541112e-06, "loss": 0.5074, "step": 3706 }, { "epoch": 1.5612803593991296, "grad_norm": 0.4845706522464752, "learning_rate": 5.5150042146598835e-06, "loss": 0.4548, "step": 3707 }, { "epoch": 1.5617015302541064, "grad_norm": 0.46849435567855835, "learning_rate": 5.512566266032995e-06, "loss": 0.4776, "step": 3708 }, { "epoch": 1.5621227011090832, "grad_norm": 0.4529879689216614, "learning_rate": 5.510128194246237e-06, "loss": 0.4304, "step": 3709 }, { "epoch": 1.56254387196406, "grad_norm": 0.44274038076400757, "learning_rate": 5.507689999885435e-06, "loss": 0.4375, "step": 3710 }, { "epoch": 1.562965042819037, "grad_norm": 0.4220748245716095, "learning_rate": 5.505251683536439e-06, "loss": 0.4224, "step": 3711 }, { "epoch": 1.5633862136740138, "grad_norm": 0.41866010427474976, "learning_rate": 5.502813245785128e-06, "loss": 0.4682, "step": 3712 }, { "epoch": 1.5638073845289906, "grad_norm": 0.44591984152793884, "learning_rate": 5.500374687217413e-06, "loss": 0.45, "step": 3713 }, { "epoch": 1.5642285553839674, "grad_norm": 0.4542437791824341, "learning_rate": 5.497936008419237e-06, "loss": 0.4452, "step": 3714 }, { "epoch": 1.5646497262389443, "grad_norm": 0.39506766200065613, "learning_rate": 5.495497209976562e-06, "loss": 0.3863, "step": 3715 }, { "epoch": 1.565070897093921, "grad_norm": 0.44179442524909973, "learning_rate": 5.493058292475387e-06, "loss": 0.4377, "step": 3716 }, { "epoch": 1.565492067948898, "grad_norm": 0.4595080018043518, "learning_rate": 5.490619256501736e-06, "loss": 0.451, "step": 3717 }, { "epoch": 1.5659132388038748, "grad_norm": 0.44693660736083984, "learning_rate": 5.488180102641665e-06, "loss": 0.4443, "step": 3718 }, { "epoch": 1.5663344096588516, "grad_norm": 0.46886372566223145, "learning_rate": 5.4857408314812556e-06, "loss": 0.4734, "step": 3719 }, { "epoch": 1.5667555805138285, "grad_norm": 0.42704465985298157, "learning_rate": 5.483301443606615e-06, "loss": 0.4084, "step": 3720 }, { "epoch": 1.5671767513688053, "grad_norm": 0.4459835886955261, "learning_rate": 5.480861939603885e-06, "loss": 0.4217, "step": 3721 }, { "epoch": 1.5675979222237821, "grad_norm": 0.3927259147167206, "learning_rate": 5.478422320059231e-06, "loss": 0.3774, "step": 3722 }, { "epoch": 1.568019093078759, "grad_norm": 0.4994848072528839, "learning_rate": 5.475982585558846e-06, "loss": 0.4861, "step": 3723 }, { "epoch": 1.5684402639337358, "grad_norm": 0.46083059906959534, "learning_rate": 5.473542736688952e-06, "loss": 0.4581, "step": 3724 }, { "epoch": 1.5688614347887126, "grad_norm": 0.4098131060600281, "learning_rate": 5.471102774035796e-06, "loss": 0.4106, "step": 3725 }, { "epoch": 1.5692826056436895, "grad_norm": 0.44747164845466614, "learning_rate": 5.4686626981856584e-06, "loss": 0.4353, "step": 3726 }, { "epoch": 1.5697037764986663, "grad_norm": 0.3869614899158478, "learning_rate": 5.46622250972484e-06, "loss": 0.4257, "step": 3727 }, { "epoch": 1.5701249473536432, "grad_norm": 0.43138182163238525, "learning_rate": 5.463782209239671e-06, "loss": 0.47, "step": 3728 }, { "epoch": 1.57054611820862, "grad_norm": 0.39205437898635864, "learning_rate": 5.46134179731651e-06, "loss": 0.3835, "step": 3729 }, { "epoch": 1.5709672890635968, "grad_norm": 0.4127623736858368, "learning_rate": 5.458901274541742e-06, "loss": 0.4363, "step": 3730 }, { "epoch": 1.5713884599185737, "grad_norm": 0.39237457513809204, "learning_rate": 5.456460641501777e-06, "loss": 0.4654, "step": 3731 }, { "epoch": 1.5718096307735505, "grad_norm": 0.4020434319972992, "learning_rate": 5.454019898783053e-06, "loss": 0.4872, "step": 3732 }, { "epoch": 1.5722308016285274, "grad_norm": 0.41389337182044983, "learning_rate": 5.4515790469720305e-06, "loss": 0.4467, "step": 3733 }, { "epoch": 1.5726519724835042, "grad_norm": 0.4216387867927551, "learning_rate": 5.449138086655206e-06, "loss": 0.4883, "step": 3734 }, { "epoch": 1.573073143338481, "grad_norm": 0.44981861114501953, "learning_rate": 5.446697018419089e-06, "loss": 0.4183, "step": 3735 }, { "epoch": 1.5734943141934579, "grad_norm": 0.42689380049705505, "learning_rate": 5.444255842850226e-06, "loss": 0.4854, "step": 3736 }, { "epoch": 1.5739154850484347, "grad_norm": 0.41390299797058105, "learning_rate": 5.441814560535181e-06, "loss": 0.4363, "step": 3737 }, { "epoch": 1.5743366559034113, "grad_norm": 0.4577016830444336, "learning_rate": 5.439373172060552e-06, "loss": 0.4693, "step": 3738 }, { "epoch": 1.5747578267583884, "grad_norm": 0.38876160979270935, "learning_rate": 5.436931678012956e-06, "loss": 0.4149, "step": 3739 }, { "epoch": 1.575178997613365, "grad_norm": 0.4328693449497223, "learning_rate": 5.434490078979035e-06, "loss": 0.443, "step": 3740 }, { "epoch": 1.575600168468342, "grad_norm": 0.40435436367988586, "learning_rate": 5.43204837554546e-06, "loss": 0.4392, "step": 3741 }, { "epoch": 1.5760213393233187, "grad_norm": 0.4119214117527008, "learning_rate": 5.429606568298926e-06, "loss": 0.4064, "step": 3742 }, { "epoch": 1.5764425101782957, "grad_norm": 0.44555509090423584, "learning_rate": 5.427164657826155e-06, "loss": 0.432, "step": 3743 }, { "epoch": 1.5768636810332723, "grad_norm": 0.41118767857551575, "learning_rate": 5.424722644713886e-06, "loss": 0.4355, "step": 3744 }, { "epoch": 1.5772848518882494, "grad_norm": 0.42486459016799927, "learning_rate": 5.422280529548892e-06, "loss": 0.4451, "step": 3745 }, { "epoch": 1.577706022743226, "grad_norm": 0.4405380189418793, "learning_rate": 5.419838312917966e-06, "loss": 0.411, "step": 3746 }, { "epoch": 1.578127193598203, "grad_norm": 0.41839268803596497, "learning_rate": 5.417395995407926e-06, "loss": 0.4179, "step": 3747 }, { "epoch": 1.5785483644531797, "grad_norm": 0.4427437484264374, "learning_rate": 5.414953577605612e-06, "loss": 0.4483, "step": 3748 }, { "epoch": 1.5789695353081568, "grad_norm": 0.4351113438606262, "learning_rate": 5.412511060097893e-06, "loss": 0.469, "step": 3749 }, { "epoch": 1.5793907061631334, "grad_norm": 0.46721798181533813, "learning_rate": 5.410068443471657e-06, "loss": 0.4829, "step": 3750 }, { "epoch": 1.5798118770181104, "grad_norm": 0.43630003929138184, "learning_rate": 5.40762572831382e-06, "loss": 0.4463, "step": 3751 }, { "epoch": 1.580233047873087, "grad_norm": 0.4243256151676178, "learning_rate": 5.405182915211318e-06, "loss": 0.4754, "step": 3752 }, { "epoch": 1.580654218728064, "grad_norm": 0.4554750621318817, "learning_rate": 5.402740004751115e-06, "loss": 0.4346, "step": 3753 }, { "epoch": 1.5810753895830407, "grad_norm": 0.39888066053390503, "learning_rate": 5.400296997520192e-06, "loss": 0.4091, "step": 3754 }, { "epoch": 1.5814965604380178, "grad_norm": 0.4421876072883606, "learning_rate": 5.397853894105559e-06, "loss": 0.4543, "step": 3755 }, { "epoch": 1.5819177312929944, "grad_norm": 0.4079934358596802, "learning_rate": 5.395410695094246e-06, "loss": 0.4215, "step": 3756 }, { "epoch": 1.5823389021479715, "grad_norm": 0.40257564187049866, "learning_rate": 5.392967401073309e-06, "loss": 0.4172, "step": 3757 }, { "epoch": 1.582760073002948, "grad_norm": 0.4292786717414856, "learning_rate": 5.390524012629824e-06, "loss": 0.4184, "step": 3758 }, { "epoch": 1.5831812438579251, "grad_norm": 0.44192489981651306, "learning_rate": 5.388080530350889e-06, "loss": 0.4232, "step": 3759 }, { "epoch": 1.5836024147129018, "grad_norm": 0.4537838399410248, "learning_rate": 5.385636954823627e-06, "loss": 0.4614, "step": 3760 }, { "epoch": 1.5840235855678788, "grad_norm": 0.4275575280189514, "learning_rate": 5.383193286635182e-06, "loss": 0.4411, "step": 3761 }, { "epoch": 1.5844447564228554, "grad_norm": 0.42281004786491394, "learning_rate": 5.380749526372722e-06, "loss": 0.4599, "step": 3762 }, { "epoch": 1.5848659272778325, "grad_norm": 0.4276893734931946, "learning_rate": 5.378305674623436e-06, "loss": 0.4386, "step": 3763 }, { "epoch": 1.585287098132809, "grad_norm": 0.4534818232059479, "learning_rate": 5.375861731974531e-06, "loss": 0.438, "step": 3764 }, { "epoch": 1.5857082689877862, "grad_norm": 0.4055396318435669, "learning_rate": 5.373417699013243e-06, "loss": 0.4054, "step": 3765 }, { "epoch": 1.5861294398427628, "grad_norm": 0.47671782970428467, "learning_rate": 5.370973576326829e-06, "loss": 0.5357, "step": 3766 }, { "epoch": 1.5865506106977398, "grad_norm": 0.44448333978652954, "learning_rate": 5.36852936450256e-06, "loss": 0.4314, "step": 3767 }, { "epoch": 1.5869717815527165, "grad_norm": 0.45912015438079834, "learning_rate": 5.366085064127734e-06, "loss": 0.4498, "step": 3768 }, { "epoch": 1.5873929524076935, "grad_norm": 0.4713861048221588, "learning_rate": 5.363640675789671e-06, "loss": 0.4812, "step": 3769 }, { "epoch": 1.5878141232626701, "grad_norm": 0.4391571283340454, "learning_rate": 5.361196200075711e-06, "loss": 0.3868, "step": 3770 }, { "epoch": 1.5882352941176472, "grad_norm": 0.49081727862358093, "learning_rate": 5.358751637573215e-06, "loss": 0.4345, "step": 3771 }, { "epoch": 1.5886564649726238, "grad_norm": 0.4125845730304718, "learning_rate": 5.356306988869562e-06, "loss": 0.4321, "step": 3772 }, { "epoch": 1.5890776358276009, "grad_norm": 0.4380047023296356, "learning_rate": 5.353862254552159e-06, "loss": 0.4195, "step": 3773 }, { "epoch": 1.5894988066825775, "grad_norm": 0.4219145178794861, "learning_rate": 5.351417435208423e-06, "loss": 0.4166, "step": 3774 }, { "epoch": 1.5899199775375545, "grad_norm": 0.46992072463035583, "learning_rate": 5.348972531425802e-06, "loss": 0.5079, "step": 3775 }, { "epoch": 1.5903411483925312, "grad_norm": 0.4151760935783386, "learning_rate": 5.346527543791758e-06, "loss": 0.42, "step": 3776 }, { "epoch": 1.5907623192475082, "grad_norm": 0.514630913734436, "learning_rate": 5.344082472893775e-06, "loss": 0.5194, "step": 3777 }, { "epoch": 1.5911834901024848, "grad_norm": 0.4949706196784973, "learning_rate": 5.341637319319356e-06, "loss": 0.4218, "step": 3778 }, { "epoch": 1.591604660957462, "grad_norm": 0.4212114214897156, "learning_rate": 5.339192083656025e-06, "loss": 0.4297, "step": 3779 }, { "epoch": 1.5920258318124385, "grad_norm": 0.4628548324108124, "learning_rate": 5.3367467664913245e-06, "loss": 0.4535, "step": 3780 }, { "epoch": 1.5924470026674156, "grad_norm": 0.39986652135849, "learning_rate": 5.334301368412821e-06, "loss": 0.458, "step": 3781 }, { "epoch": 1.5928681735223922, "grad_norm": 0.38669899106025696, "learning_rate": 5.331855890008092e-06, "loss": 0.4279, "step": 3782 }, { "epoch": 1.5932893443773692, "grad_norm": 0.423390656709671, "learning_rate": 5.329410331864743e-06, "loss": 0.4038, "step": 3783 }, { "epoch": 1.5937105152323459, "grad_norm": 0.38919228315353394, "learning_rate": 5.326964694570391e-06, "loss": 0.3983, "step": 3784 }, { "epoch": 1.594131686087323, "grad_norm": 0.4484677016735077, "learning_rate": 5.32451897871268e-06, "loss": 0.4811, "step": 3785 }, { "epoch": 1.5945528569422995, "grad_norm": 0.41881367564201355, "learning_rate": 5.322073184879267e-06, "loss": 0.4191, "step": 3786 }, { "epoch": 1.5949740277972766, "grad_norm": 0.4624983072280884, "learning_rate": 5.319627313657829e-06, "loss": 0.4628, "step": 3787 }, { "epoch": 1.5953951986522532, "grad_norm": 0.43945467472076416, "learning_rate": 5.317181365636061e-06, "loss": 0.4659, "step": 3788 }, { "epoch": 1.59581636950723, "grad_norm": 0.4001675546169281, "learning_rate": 5.314735341401681e-06, "loss": 0.443, "step": 3789 }, { "epoch": 1.5962375403622069, "grad_norm": 0.39841392636299133, "learning_rate": 5.31228924154242e-06, "loss": 0.4188, "step": 3790 }, { "epoch": 1.5966587112171837, "grad_norm": 0.4898279905319214, "learning_rate": 5.309843066646027e-06, "loss": 0.5182, "step": 3791 }, { "epoch": 1.5970798820721606, "grad_norm": 0.3846484422683716, "learning_rate": 5.307396817300272e-06, "loss": 0.4167, "step": 3792 }, { "epoch": 1.5975010529271374, "grad_norm": 0.3790142238140106, "learning_rate": 5.304950494092945e-06, "loss": 0.4154, "step": 3793 }, { "epoch": 1.5979222237821142, "grad_norm": 0.4081941545009613, "learning_rate": 5.302504097611847e-06, "loss": 0.4796, "step": 3794 }, { "epoch": 1.598343394637091, "grad_norm": 0.40415966510772705, "learning_rate": 5.300057628444801e-06, "loss": 0.4613, "step": 3795 }, { "epoch": 1.598764565492068, "grad_norm": 0.4000833034515381, "learning_rate": 5.29761108717965e-06, "loss": 0.4408, "step": 3796 }, { "epoch": 1.5991857363470448, "grad_norm": 0.4128383696079254, "learning_rate": 5.295164474404245e-06, "loss": 0.467, "step": 3797 }, { "epoch": 1.5996069072020216, "grad_norm": 0.3986220359802246, "learning_rate": 5.292717790706467e-06, "loss": 0.4414, "step": 3798 }, { "epoch": 1.6000280780569984, "grad_norm": 0.3838790953159332, "learning_rate": 5.290271036674201e-06, "loss": 0.4206, "step": 3799 }, { "epoch": 1.6004492489119753, "grad_norm": 0.4394805431365967, "learning_rate": 5.28782421289536e-06, "loss": 0.4679, "step": 3800 }, { "epoch": 1.600870419766952, "grad_norm": 0.458963006734848, "learning_rate": 5.285377319957866e-06, "loss": 0.4709, "step": 3801 }, { "epoch": 1.601291590621929, "grad_norm": 0.4487631022930145, "learning_rate": 5.282930358449663e-06, "loss": 0.4298, "step": 3802 }, { "epoch": 1.6017127614769058, "grad_norm": 0.4141668379306793, "learning_rate": 5.280483328958707e-06, "loss": 0.4284, "step": 3803 }, { "epoch": 1.6021339323318826, "grad_norm": 0.3963787853717804, "learning_rate": 5.278036232072971e-06, "loss": 0.4142, "step": 3804 }, { "epoch": 1.6025551031868595, "grad_norm": 0.43061962723731995, "learning_rate": 5.275589068380451e-06, "loss": 0.4209, "step": 3805 }, { "epoch": 1.6029762740418363, "grad_norm": 0.44785958528518677, "learning_rate": 5.2731418384691494e-06, "loss": 0.4651, "step": 3806 }, { "epoch": 1.6033974448968131, "grad_norm": 0.42917659878730774, "learning_rate": 5.270694542927089e-06, "loss": 0.4369, "step": 3807 }, { "epoch": 1.60381861575179, "grad_norm": 0.427166223526001, "learning_rate": 5.268247182342307e-06, "loss": 0.4635, "step": 3808 }, { "epoch": 1.6042397866067668, "grad_norm": 0.4004139006137848, "learning_rate": 5.26579975730286e-06, "loss": 0.4547, "step": 3809 }, { "epoch": 1.6046609574617436, "grad_norm": 0.4148801565170288, "learning_rate": 5.263352268396818e-06, "loss": 0.4383, "step": 3810 }, { "epoch": 1.6050821283167205, "grad_norm": 0.4726579785346985, "learning_rate": 5.2609047162122636e-06, "loss": 0.5203, "step": 3811 }, { "epoch": 1.6055032991716973, "grad_norm": 0.4473476707935333, "learning_rate": 5.258457101337297e-06, "loss": 0.4394, "step": 3812 }, { "epoch": 1.6059244700266742, "grad_norm": 0.3768884539604187, "learning_rate": 5.2560094243600335e-06, "loss": 0.437, "step": 3813 }, { "epoch": 1.606345640881651, "grad_norm": 0.43403780460357666, "learning_rate": 5.2535616858686024e-06, "loss": 0.4258, "step": 3814 }, { "epoch": 1.6067668117366278, "grad_norm": 0.44044002890586853, "learning_rate": 5.25111388645115e-06, "loss": 0.4761, "step": 3815 }, { "epoch": 1.6071879825916047, "grad_norm": 0.364793986082077, "learning_rate": 5.248666026695835e-06, "loss": 0.4301, "step": 3816 }, { "epoch": 1.6076091534465815, "grad_norm": 0.44301676750183105, "learning_rate": 5.246218107190829e-06, "loss": 0.4837, "step": 3817 }, { "epoch": 1.6080303243015583, "grad_norm": 0.4280470907688141, "learning_rate": 5.243770128524325e-06, "loss": 0.4206, "step": 3818 }, { "epoch": 1.6084514951565352, "grad_norm": 0.4442239999771118, "learning_rate": 5.241322091284522e-06, "loss": 0.446, "step": 3819 }, { "epoch": 1.608872666011512, "grad_norm": 0.419758677482605, "learning_rate": 5.238873996059637e-06, "loss": 0.4011, "step": 3820 }, { "epoch": 1.6092938368664889, "grad_norm": 0.4904267191886902, "learning_rate": 5.2364258434379e-06, "loss": 0.4697, "step": 3821 }, { "epoch": 1.6097150077214657, "grad_norm": 0.46231311559677124, "learning_rate": 5.2339776340075566e-06, "loss": 0.4365, "step": 3822 }, { "epoch": 1.6101361785764425, "grad_norm": 0.4883180260658264, "learning_rate": 5.231529368356864e-06, "loss": 0.4225, "step": 3823 }, { "epoch": 1.6105573494314194, "grad_norm": 0.4130379259586334, "learning_rate": 5.2290810470740925e-06, "loss": 0.458, "step": 3824 }, { "epoch": 1.6109785202863962, "grad_norm": 0.4318091869354248, "learning_rate": 5.2266326707475305e-06, "loss": 0.4297, "step": 3825 }, { "epoch": 1.611399691141373, "grad_norm": 0.4788745939731598, "learning_rate": 5.224184239965472e-06, "loss": 0.4804, "step": 3826 }, { "epoch": 1.6118208619963499, "grad_norm": 0.3745500445365906, "learning_rate": 5.2217357553162284e-06, "loss": 0.3739, "step": 3827 }, { "epoch": 1.6122420328513267, "grad_norm": 0.4407658278942108, "learning_rate": 5.219287217388127e-06, "loss": 0.4474, "step": 3828 }, { "epoch": 1.6126632037063036, "grad_norm": 0.435131311416626, "learning_rate": 5.2168386267695005e-06, "loss": 0.4013, "step": 3829 }, { "epoch": 1.6130843745612804, "grad_norm": 0.517959713935852, "learning_rate": 5.214389984048703e-06, "loss": 0.5493, "step": 3830 }, { "epoch": 1.6135055454162572, "grad_norm": 0.4349512755870819, "learning_rate": 5.211941289814091e-06, "loss": 0.4054, "step": 3831 }, { "epoch": 1.613926716271234, "grad_norm": 0.4244046211242676, "learning_rate": 5.209492544654042e-06, "loss": 0.4491, "step": 3832 }, { "epoch": 1.614347887126211, "grad_norm": 0.441500186920166, "learning_rate": 5.207043749156945e-06, "loss": 0.4727, "step": 3833 }, { "epoch": 1.6147690579811877, "grad_norm": 0.4414854943752289, "learning_rate": 5.204594903911194e-06, "loss": 0.4222, "step": 3834 }, { "epoch": 1.6151902288361646, "grad_norm": 0.4183861315250397, "learning_rate": 5.202146009505203e-06, "loss": 0.4134, "step": 3835 }, { "epoch": 1.6156113996911414, "grad_norm": 0.44886186718940735, "learning_rate": 5.199697066527391e-06, "loss": 0.4601, "step": 3836 }, { "epoch": 1.6160325705461183, "grad_norm": 0.4115998446941376, "learning_rate": 5.197248075566197e-06, "loss": 0.4052, "step": 3837 }, { "epoch": 1.616453741401095, "grad_norm": 0.43890684843063354, "learning_rate": 5.194799037210063e-06, "loss": 0.4341, "step": 3838 }, { "epoch": 1.6168749122560717, "grad_norm": 0.4437463581562042, "learning_rate": 5.192349952047446e-06, "loss": 0.4943, "step": 3839 }, { "epoch": 1.6172960831110488, "grad_norm": 0.4205867648124695, "learning_rate": 5.1899008206668174e-06, "loss": 0.412, "step": 3840 }, { "epoch": 1.6177172539660254, "grad_norm": 0.4457806348800659, "learning_rate": 5.187451643656654e-06, "loss": 0.4205, "step": 3841 }, { "epoch": 1.6181384248210025, "grad_norm": 0.4963605999946594, "learning_rate": 5.185002421605447e-06, "loss": 0.4873, "step": 3842 }, { "epoch": 1.618559595675979, "grad_norm": 0.3976714015007019, "learning_rate": 5.182553155101696e-06, "loss": 0.4271, "step": 3843 }, { "epoch": 1.6189807665309561, "grad_norm": 0.47293412685394287, "learning_rate": 5.180103844733915e-06, "loss": 0.4706, "step": 3844 }, { "epoch": 1.6194019373859327, "grad_norm": 0.43096980452537537, "learning_rate": 5.177654491090627e-06, "loss": 0.4531, "step": 3845 }, { "epoch": 1.6198231082409098, "grad_norm": 0.4959094226360321, "learning_rate": 5.175205094760361e-06, "loss": 0.4629, "step": 3846 }, { "epoch": 1.6202442790958864, "grad_norm": 0.45609068870544434, "learning_rate": 5.172755656331665e-06, "loss": 0.4599, "step": 3847 }, { "epoch": 1.6206654499508635, "grad_norm": 0.42318397760391235, "learning_rate": 5.170306176393088e-06, "loss": 0.4221, "step": 3848 }, { "epoch": 1.62108662080584, "grad_norm": 0.46200069785118103, "learning_rate": 5.167856655533196e-06, "loss": 0.4572, "step": 3849 }, { "epoch": 1.6215077916608172, "grad_norm": 0.40603315830230713, "learning_rate": 5.165407094340562e-06, "loss": 0.377, "step": 3850 }, { "epoch": 1.6219289625157938, "grad_norm": 0.4458051919937134, "learning_rate": 5.162957493403766e-06, "loss": 0.4604, "step": 3851 }, { "epoch": 1.6223501333707708, "grad_norm": 0.41818007826805115, "learning_rate": 5.160507853311403e-06, "loss": 0.4519, "step": 3852 }, { "epoch": 1.6227713042257474, "grad_norm": 0.44798699021339417, "learning_rate": 5.158058174652072e-06, "loss": 0.4453, "step": 3853 }, { "epoch": 1.6231924750807245, "grad_norm": 0.5032545328140259, "learning_rate": 5.155608458014386e-06, "loss": 0.4401, "step": 3854 }, { "epoch": 1.6236136459357011, "grad_norm": 0.4901072382926941, "learning_rate": 5.153158703986966e-06, "loss": 0.4551, "step": 3855 }, { "epoch": 1.6240348167906782, "grad_norm": 0.4370547831058502, "learning_rate": 5.150708913158438e-06, "loss": 0.4236, "step": 3856 }, { "epoch": 1.6244559876456548, "grad_norm": 0.4070223867893219, "learning_rate": 5.148259086117442e-06, "loss": 0.4148, "step": 3857 }, { "epoch": 1.6248771585006319, "grad_norm": 0.45737093687057495, "learning_rate": 5.145809223452625e-06, "loss": 0.4757, "step": 3858 }, { "epoch": 1.6252983293556085, "grad_norm": 0.40683338046073914, "learning_rate": 5.143359325752638e-06, "loss": 0.449, "step": 3859 }, { "epoch": 1.6257195002105855, "grad_norm": 0.38983166217803955, "learning_rate": 5.14090939360615e-06, "loss": 0.4402, "step": 3860 }, { "epoch": 1.6261406710655621, "grad_norm": 0.49871334433555603, "learning_rate": 5.138459427601831e-06, "loss": 0.463, "step": 3861 }, { "epoch": 1.6265618419205392, "grad_norm": 0.3733348846435547, "learning_rate": 5.136009428328359e-06, "loss": 0.3649, "step": 3862 }, { "epoch": 1.6269830127755158, "grad_norm": 0.4274713695049286, "learning_rate": 5.133559396374423e-06, "loss": 0.4955, "step": 3863 }, { "epoch": 1.6274041836304929, "grad_norm": 0.41455990076065063, "learning_rate": 5.131109332328721e-06, "loss": 0.4292, "step": 3864 }, { "epoch": 1.6278253544854695, "grad_norm": 0.4292064607143402, "learning_rate": 5.128659236779955e-06, "loss": 0.4654, "step": 3865 }, { "epoch": 1.6282465253404466, "grad_norm": 0.39235833287239075, "learning_rate": 5.126209110316835e-06, "loss": 0.3957, "step": 3866 }, { "epoch": 1.6286676961954232, "grad_norm": 0.44925379753112793, "learning_rate": 5.12375895352808e-06, "loss": 0.4494, "step": 3867 }, { "epoch": 1.6290888670504002, "grad_norm": 0.4677084684371948, "learning_rate": 5.1213087670024155e-06, "loss": 0.4563, "step": 3868 }, { "epoch": 1.6295100379053769, "grad_norm": 0.37864387035369873, "learning_rate": 5.118858551328576e-06, "loss": 0.4269, "step": 3869 }, { "epoch": 1.629931208760354, "grad_norm": 0.3515053987503052, "learning_rate": 5.116408307095301e-06, "loss": 0.3499, "step": 3870 }, { "epoch": 1.6303523796153305, "grad_norm": 0.5064327120780945, "learning_rate": 5.113958034891335e-06, "loss": 0.5148, "step": 3871 }, { "epoch": 1.6307735504703076, "grad_norm": 0.4082535207271576, "learning_rate": 5.111507735305436e-06, "loss": 0.365, "step": 3872 }, { "epoch": 1.6311947213252842, "grad_norm": 0.4019666910171509, "learning_rate": 5.109057408926359e-06, "loss": 0.4596, "step": 3873 }, { "epoch": 1.6316158921802613, "grad_norm": 0.41575026512145996, "learning_rate": 5.1066070563428736e-06, "loss": 0.4596, "step": 3874 }, { "epoch": 1.6320370630352379, "grad_norm": 0.4670383036136627, "learning_rate": 5.1041566781437525e-06, "loss": 0.4837, "step": 3875 }, { "epoch": 1.632458233890215, "grad_norm": 0.4504801034927368, "learning_rate": 5.101706274917775e-06, "loss": 0.4404, "step": 3876 }, { "epoch": 1.6328794047451916, "grad_norm": 0.4516677260398865, "learning_rate": 5.099255847253725e-06, "loss": 0.4152, "step": 3877 }, { "epoch": 1.6333005756001686, "grad_norm": 0.47979775071144104, "learning_rate": 5.096805395740394e-06, "loss": 0.4622, "step": 3878 }, { "epoch": 1.6337217464551452, "grad_norm": 0.419607937335968, "learning_rate": 5.094354920966576e-06, "loss": 0.3763, "step": 3879 }, { "epoch": 1.6341429173101223, "grad_norm": 0.4466531276702881, "learning_rate": 5.091904423521079e-06, "loss": 0.4682, "step": 3880 }, { "epoch": 1.634564088165099, "grad_norm": 0.47685372829437256, "learning_rate": 5.089453903992706e-06, "loss": 0.4476, "step": 3881 }, { "epoch": 1.634985259020076, "grad_norm": 0.421130508184433, "learning_rate": 5.087003362970271e-06, "loss": 0.4147, "step": 3882 }, { "epoch": 1.6354064298750526, "grad_norm": 0.4248106777667999, "learning_rate": 5.084552801042591e-06, "loss": 0.4289, "step": 3883 }, { "epoch": 1.6358276007300296, "grad_norm": 0.42932870984077454, "learning_rate": 5.082102218798492e-06, "loss": 0.418, "step": 3884 }, { "epoch": 1.6362487715850063, "grad_norm": 0.4597097933292389, "learning_rate": 5.079651616826802e-06, "loss": 0.4605, "step": 3885 }, { "epoch": 1.6366699424399833, "grad_norm": 0.4493977129459381, "learning_rate": 5.077200995716351e-06, "loss": 0.449, "step": 3886 }, { "epoch": 1.63709111329496, "grad_norm": 0.4276854395866394, "learning_rate": 5.074750356055976e-06, "loss": 0.4047, "step": 3887 }, { "epoch": 1.6375122841499368, "grad_norm": 0.4161663055419922, "learning_rate": 5.072299698434523e-06, "loss": 0.4902, "step": 3888 }, { "epoch": 1.6379334550049136, "grad_norm": 0.4401995837688446, "learning_rate": 5.069849023440834e-06, "loss": 0.4446, "step": 3889 }, { "epoch": 1.6383546258598904, "grad_norm": 0.3952595293521881, "learning_rate": 5.067398331663761e-06, "loss": 0.4096, "step": 3890 }, { "epoch": 1.6387757967148673, "grad_norm": 0.4474954903125763, "learning_rate": 5.064947623692159e-06, "loss": 0.4569, "step": 3891 }, { "epoch": 1.6391969675698441, "grad_norm": 0.3873271942138672, "learning_rate": 5.062496900114887e-06, "loss": 0.4345, "step": 3892 }, { "epoch": 1.639618138424821, "grad_norm": 0.4405921995639801, "learning_rate": 5.060046161520803e-06, "loss": 0.4646, "step": 3893 }, { "epoch": 1.6400393092797978, "grad_norm": 0.4235416352748871, "learning_rate": 5.057595408498777e-06, "loss": 0.426, "step": 3894 }, { "epoch": 1.6404604801347746, "grad_norm": 0.38939371705055237, "learning_rate": 5.055144641637675e-06, "loss": 0.398, "step": 3895 }, { "epoch": 1.6408816509897515, "grad_norm": 0.41208261251449585, "learning_rate": 5.052693861526371e-06, "loss": 0.4407, "step": 3896 }, { "epoch": 1.6413028218447283, "grad_norm": 0.3976222574710846, "learning_rate": 5.050243068753741e-06, "loss": 0.4552, "step": 3897 }, { "epoch": 1.6417239926997051, "grad_norm": 0.43786343932151794, "learning_rate": 5.0477922639086594e-06, "loss": 0.4454, "step": 3898 }, { "epoch": 1.642145163554682, "grad_norm": 0.4908190071582794, "learning_rate": 5.045341447580016e-06, "loss": 0.4676, "step": 3899 }, { "epoch": 1.6425663344096588, "grad_norm": 0.39060738682746887, "learning_rate": 5.042890620356687e-06, "loss": 0.4196, "step": 3900 }, { "epoch": 1.6429875052646357, "grad_norm": 0.46024319529533386, "learning_rate": 5.040439782827563e-06, "loss": 0.4839, "step": 3901 }, { "epoch": 1.6434086761196125, "grad_norm": 0.45075640082359314, "learning_rate": 5.037988935581532e-06, "loss": 0.4587, "step": 3902 }, { "epoch": 1.6438298469745893, "grad_norm": 0.4420000910758972, "learning_rate": 5.035538079207488e-06, "loss": 0.4345, "step": 3903 }, { "epoch": 1.6442510178295662, "grad_norm": 0.4733249247074127, "learning_rate": 5.033087214294324e-06, "loss": 0.4535, "step": 3904 }, { "epoch": 1.644672188684543, "grad_norm": 0.4086625277996063, "learning_rate": 5.030636341430936e-06, "loss": 0.3889, "step": 3905 }, { "epoch": 1.6450933595395199, "grad_norm": 0.4456026554107666, "learning_rate": 5.028185461206218e-06, "loss": 0.484, "step": 3906 }, { "epoch": 1.6455145303944967, "grad_norm": 0.40992915630340576, "learning_rate": 5.025734574209076e-06, "loss": 0.4181, "step": 3907 }, { "epoch": 1.6459357012494735, "grad_norm": 0.42397862672805786, "learning_rate": 5.0232836810284055e-06, "loss": 0.3974, "step": 3908 }, { "epoch": 1.6463568721044504, "grad_norm": 0.4559449553489685, "learning_rate": 5.020832782253115e-06, "loss": 0.4761, "step": 3909 }, { "epoch": 1.6467780429594272, "grad_norm": 0.42997080087661743, "learning_rate": 5.018381878472102e-06, "loss": 0.4746, "step": 3910 }, { "epoch": 1.647199213814404, "grad_norm": 0.45033615827560425, "learning_rate": 5.015930970274278e-06, "loss": 0.466, "step": 3911 }, { "epoch": 1.6476203846693809, "grad_norm": 0.4368593096733093, "learning_rate": 5.0134800582485455e-06, "loss": 0.454, "step": 3912 }, { "epoch": 1.6480415555243577, "grad_norm": 0.42166733741760254, "learning_rate": 5.0110291429838135e-06, "loss": 0.4185, "step": 3913 }, { "epoch": 1.6484627263793346, "grad_norm": 0.42837268114089966, "learning_rate": 5.008578225068988e-06, "loss": 0.4656, "step": 3914 }, { "epoch": 1.6488838972343114, "grad_norm": 0.42237961292266846, "learning_rate": 5.00612730509298e-06, "loss": 0.4168, "step": 3915 }, { "epoch": 1.6493050680892882, "grad_norm": 0.4881746768951416, "learning_rate": 5.0036763836446976e-06, "loss": 0.4676, "step": 3916 }, { "epoch": 1.649726238944265, "grad_norm": 0.4453079104423523, "learning_rate": 5.001225461313052e-06, "loss": 0.4341, "step": 3917 }, { "epoch": 1.650147409799242, "grad_norm": 0.43830299377441406, "learning_rate": 4.998774538686949e-06, "loss": 0.4574, "step": 3918 }, { "epoch": 1.6505685806542187, "grad_norm": 0.4083831012248993, "learning_rate": 4.996323616355302e-06, "loss": 0.4219, "step": 3919 }, { "epoch": 1.6509897515091956, "grad_norm": 0.4007321894168854, "learning_rate": 4.993872694907022e-06, "loss": 0.4542, "step": 3920 }, { "epoch": 1.6514109223641724, "grad_norm": 0.4427451193332672, "learning_rate": 4.991421774931013e-06, "loss": 0.4459, "step": 3921 }, { "epoch": 1.6518320932191493, "grad_norm": 0.4569895267486572, "learning_rate": 4.988970857016188e-06, "loss": 0.4236, "step": 3922 }, { "epoch": 1.652253264074126, "grad_norm": 0.42523857951164246, "learning_rate": 4.986519941751456e-06, "loss": 0.4317, "step": 3923 }, { "epoch": 1.652674434929103, "grad_norm": 0.4488692879676819, "learning_rate": 4.984069029725723e-06, "loss": 0.4645, "step": 3924 }, { "epoch": 1.6530956057840798, "grad_norm": 0.37913644313812256, "learning_rate": 4.981618121527899e-06, "loss": 0.4265, "step": 3925 }, { "epoch": 1.6535167766390566, "grad_norm": 0.4043061435222626, "learning_rate": 4.979167217746888e-06, "loss": 0.3989, "step": 3926 }, { "epoch": 1.6539379474940334, "grad_norm": 0.47139036655426025, "learning_rate": 4.976716318971595e-06, "loss": 0.5019, "step": 3927 }, { "epoch": 1.6543591183490103, "grad_norm": 0.4032707214355469, "learning_rate": 4.9742654257909266e-06, "loss": 0.4303, "step": 3928 }, { "epoch": 1.6547802892039871, "grad_norm": 0.3984912931919098, "learning_rate": 4.971814538793783e-06, "loss": 0.4306, "step": 3929 }, { "epoch": 1.655201460058964, "grad_norm": 0.44017109274864197, "learning_rate": 4.9693636585690675e-06, "loss": 0.4497, "step": 3930 }, { "epoch": 1.6556226309139408, "grad_norm": 0.42121660709381104, "learning_rate": 4.9669127857056775e-06, "loss": 0.4622, "step": 3931 }, { "epoch": 1.6560438017689176, "grad_norm": 0.40630313754081726, "learning_rate": 4.964461920792512e-06, "loss": 0.4434, "step": 3932 }, { "epoch": 1.6564649726238945, "grad_norm": 0.40504190325737, "learning_rate": 4.962011064418469e-06, "loss": 0.4021, "step": 3933 }, { "epoch": 1.6568861434788713, "grad_norm": 0.47945380210876465, "learning_rate": 4.959560217172437e-06, "loss": 0.4832, "step": 3934 }, { "epoch": 1.6573073143338481, "grad_norm": 0.41835203766822815, "learning_rate": 4.957109379643315e-06, "loss": 0.4437, "step": 3935 }, { "epoch": 1.657728485188825, "grad_norm": 0.4720364809036255, "learning_rate": 4.954658552419987e-06, "loss": 0.4766, "step": 3936 }, { "epoch": 1.6581496560438018, "grad_norm": 0.4394035041332245, "learning_rate": 4.952207736091341e-06, "loss": 0.4358, "step": 3937 }, { "epoch": 1.6585708268987784, "grad_norm": 0.4435567259788513, "learning_rate": 4.949756931246261e-06, "loss": 0.4877, "step": 3938 }, { "epoch": 1.6589919977537555, "grad_norm": 0.3961528241634369, "learning_rate": 4.9473061384736296e-06, "loss": 0.4129, "step": 3939 }, { "epoch": 1.6594131686087321, "grad_norm": 0.43792086839675903, "learning_rate": 4.944855358362328e-06, "loss": 0.4777, "step": 3940 }, { "epoch": 1.6598343394637092, "grad_norm": 0.40669289231300354, "learning_rate": 4.942404591501225e-06, "loss": 0.4389, "step": 3941 }, { "epoch": 1.6602555103186858, "grad_norm": 0.39309215545654297, "learning_rate": 4.939953838479198e-06, "loss": 0.4419, "step": 3942 }, { "epoch": 1.6606766811736628, "grad_norm": 0.4182872176170349, "learning_rate": 4.937503099885115e-06, "loss": 0.336, "step": 3943 }, { "epoch": 1.6610978520286395, "grad_norm": 0.48299098014831543, "learning_rate": 4.935052376307841e-06, "loss": 0.5105, "step": 3944 }, { "epoch": 1.6615190228836165, "grad_norm": 0.44241616129875183, "learning_rate": 4.93260166833624e-06, "loss": 0.4935, "step": 3945 }, { "epoch": 1.6619401937385931, "grad_norm": 0.43647560477256775, "learning_rate": 4.930150976559168e-06, "loss": 0.4425, "step": 3946 }, { "epoch": 1.6623613645935702, "grad_norm": 0.42762428522109985, "learning_rate": 4.927700301565479e-06, "loss": 0.453, "step": 3947 }, { "epoch": 1.6627825354485468, "grad_norm": 0.40185457468032837, "learning_rate": 4.925249643944025e-06, "loss": 0.4112, "step": 3948 }, { "epoch": 1.6632037063035239, "grad_norm": 0.41933247447013855, "learning_rate": 4.922799004283651e-06, "loss": 0.4322, "step": 3949 }, { "epoch": 1.6636248771585005, "grad_norm": 0.41041335463523865, "learning_rate": 4.9203483831732004e-06, "loss": 0.4191, "step": 3950 }, { "epoch": 1.6640460480134776, "grad_norm": 0.4073835611343384, "learning_rate": 4.9178977812015086e-06, "loss": 0.4142, "step": 3951 }, { "epoch": 1.6644672188684542, "grad_norm": 0.4580373167991638, "learning_rate": 4.91544719895741e-06, "loss": 0.4832, "step": 3952 }, { "epoch": 1.6648883897234312, "grad_norm": 0.39494839310646057, "learning_rate": 4.9129966370297315e-06, "loss": 0.4497, "step": 3953 }, { "epoch": 1.6653095605784078, "grad_norm": 0.4619349539279938, "learning_rate": 4.910546096007295e-06, "loss": 0.4966, "step": 3954 }, { "epoch": 1.665730731433385, "grad_norm": 0.4276808798313141, "learning_rate": 4.908095576478924e-06, "loss": 0.3764, "step": 3955 }, { "epoch": 1.6661519022883615, "grad_norm": 0.43444332480430603, "learning_rate": 4.9056450790334254e-06, "loss": 0.4997, "step": 3956 }, { "epoch": 1.6665730731433386, "grad_norm": 0.4442448914051056, "learning_rate": 4.903194604259608e-06, "loss": 0.4635, "step": 3957 }, { "epoch": 1.6669942439983152, "grad_norm": 0.4009758234024048, "learning_rate": 4.900744152746276e-06, "loss": 0.4091, "step": 3958 }, { "epoch": 1.6674154148532923, "grad_norm": 0.4364372491836548, "learning_rate": 4.898293725082226e-06, "loss": 0.4391, "step": 3959 }, { "epoch": 1.6678365857082689, "grad_norm": 0.46495065093040466, "learning_rate": 4.895843321856249e-06, "loss": 0.4348, "step": 3960 }, { "epoch": 1.668257756563246, "grad_norm": 0.3937761187553406, "learning_rate": 4.893392943657127e-06, "loss": 0.4238, "step": 3961 }, { "epoch": 1.6686789274182225, "grad_norm": 0.4331815540790558, "learning_rate": 4.890942591073643e-06, "loss": 0.4492, "step": 3962 }, { "epoch": 1.6691000982731996, "grad_norm": 0.4634914696216583, "learning_rate": 4.888492264694566e-06, "loss": 0.4873, "step": 3963 }, { "epoch": 1.6695212691281762, "grad_norm": 0.43838781118392944, "learning_rate": 4.886041965108666e-06, "loss": 0.4701, "step": 3964 }, { "epoch": 1.6699424399831533, "grad_norm": 0.3795328438282013, "learning_rate": 4.8835916929047016e-06, "loss": 0.3862, "step": 3965 }, { "epoch": 1.67036361083813, "grad_norm": 0.4301947057247162, "learning_rate": 4.881141448671426e-06, "loss": 0.4338, "step": 3966 }, { "epoch": 1.670784781693107, "grad_norm": 0.45671942830085754, "learning_rate": 4.878691232997586e-06, "loss": 0.4772, "step": 3967 }, { "epoch": 1.6712059525480836, "grad_norm": 0.44765278697013855, "learning_rate": 4.876241046471922e-06, "loss": 0.4627, "step": 3968 }, { "epoch": 1.6716271234030606, "grad_norm": 0.40793564915657043, "learning_rate": 4.873790889683167e-06, "loss": 0.4658, "step": 3969 }, { "epoch": 1.6720482942580372, "grad_norm": 0.43220630288124084, "learning_rate": 4.871340763220047e-06, "loss": 0.4567, "step": 3970 }, { "epoch": 1.6724694651130143, "grad_norm": 0.41756272315979004, "learning_rate": 4.868890667671281e-06, "loss": 0.4226, "step": 3971 }, { "epoch": 1.672890635967991, "grad_norm": 0.4502735137939453, "learning_rate": 4.8664406036255776e-06, "loss": 0.4331, "step": 3972 }, { "epoch": 1.673311806822968, "grad_norm": 0.43419191241264343, "learning_rate": 4.863990571671642e-06, "loss": 0.4625, "step": 3973 }, { "epoch": 1.6737329776779446, "grad_norm": 0.4126448333263397, "learning_rate": 4.861540572398169e-06, "loss": 0.4244, "step": 3974 }, { "epoch": 1.6741541485329217, "grad_norm": 0.46959546208381653, "learning_rate": 4.859090606393852e-06, "loss": 0.4748, "step": 3975 }, { "epoch": 1.6745753193878983, "grad_norm": 0.4260556399822235, "learning_rate": 4.8566406742473634e-06, "loss": 0.382, "step": 3976 }, { "epoch": 1.6749964902428753, "grad_norm": 0.4595566391944885, "learning_rate": 4.854190776547377e-06, "loss": 0.48, "step": 3977 }, { "epoch": 1.675417661097852, "grad_norm": 0.4441293478012085, "learning_rate": 4.8517409138825585e-06, "loss": 0.4109, "step": 3978 }, { "epoch": 1.675838831952829, "grad_norm": 0.45606887340545654, "learning_rate": 4.849291086841562e-06, "loss": 0.4297, "step": 3979 }, { "epoch": 1.6762600028078056, "grad_norm": 0.4671350419521332, "learning_rate": 4.8468412960130365e-06, "loss": 0.4343, "step": 3980 }, { "epoch": 1.6766811736627827, "grad_norm": 0.4296891391277313, "learning_rate": 4.8443915419856154e-06, "loss": 0.4354, "step": 3981 }, { "epoch": 1.6771023445177593, "grad_norm": 0.4543410837650299, "learning_rate": 4.8419418253479295e-06, "loss": 0.5043, "step": 3982 }, { "epoch": 1.6775235153727364, "grad_norm": 0.4402567744255066, "learning_rate": 4.839492146688599e-06, "loss": 0.3981, "step": 3983 }, { "epoch": 1.677944686227713, "grad_norm": 0.4577004313468933, "learning_rate": 4.837042506596235e-06, "loss": 0.4649, "step": 3984 }, { "epoch": 1.67836585708269, "grad_norm": 0.45153045654296875, "learning_rate": 4.834592905659441e-06, "loss": 0.4237, "step": 3985 }, { "epoch": 1.6787870279376667, "grad_norm": 0.4123789668083191, "learning_rate": 4.832143344466805e-06, "loss": 0.4356, "step": 3986 }, { "epoch": 1.6792081987926437, "grad_norm": 0.44165247678756714, "learning_rate": 4.829693823606913e-06, "loss": 0.419, "step": 3987 }, { "epoch": 1.6796293696476203, "grad_norm": 0.4284719228744507, "learning_rate": 4.827244343668337e-06, "loss": 0.4313, "step": 3988 }, { "epoch": 1.6800505405025972, "grad_norm": 0.4113755524158478, "learning_rate": 4.82479490523964e-06, "loss": 0.4204, "step": 3989 }, { "epoch": 1.680471711357574, "grad_norm": 0.44305121898651123, "learning_rate": 4.822345508909376e-06, "loss": 0.4483, "step": 3990 }, { "epoch": 1.6808928822125508, "grad_norm": 0.5089574456214905, "learning_rate": 4.819896155266087e-06, "loss": 0.4855, "step": 3991 }, { "epoch": 1.6813140530675277, "grad_norm": 0.4277949929237366, "learning_rate": 4.8174468448983055e-06, "loss": 0.4833, "step": 3992 }, { "epoch": 1.6817352239225045, "grad_norm": 0.37129122018814087, "learning_rate": 4.814997578394555e-06, "loss": 0.3513, "step": 3993 }, { "epoch": 1.6821563947774814, "grad_norm": 0.4019164741039276, "learning_rate": 4.812548356343347e-06, "loss": 0.442, "step": 3994 }, { "epoch": 1.6825775656324582, "grad_norm": 0.43896475434303284, "learning_rate": 4.810099179333185e-06, "loss": 0.4937, "step": 3995 }, { "epoch": 1.682998736487435, "grad_norm": 0.4292345345020294, "learning_rate": 4.807650047952555e-06, "loss": 0.4408, "step": 3996 }, { "epoch": 1.6834199073424119, "grad_norm": 0.3983165919780731, "learning_rate": 4.805200962789938e-06, "loss": 0.4134, "step": 3997 }, { "epoch": 1.6838410781973887, "grad_norm": 0.4237021803855896, "learning_rate": 4.802751924433804e-06, "loss": 0.505, "step": 3998 }, { "epoch": 1.6842622490523655, "grad_norm": 0.4333159029483795, "learning_rate": 4.800302933472608e-06, "loss": 0.4661, "step": 3999 }, { "epoch": 1.6846834199073424, "grad_norm": 0.40169796347618103, "learning_rate": 4.797853990494798e-06, "loss": 0.3766, "step": 4000 }, { "epoch": 1.6851045907623192, "grad_norm": 0.49455106258392334, "learning_rate": 4.795405096088807e-06, "loss": 0.4842, "step": 4001 }, { "epoch": 1.685525761617296, "grad_norm": 0.37881654500961304, "learning_rate": 4.792956250843056e-06, "loss": 0.3904, "step": 4002 }, { "epoch": 1.685946932472273, "grad_norm": 0.44875890016555786, "learning_rate": 4.790507455345958e-06, "loss": 0.4498, "step": 4003 }, { "epoch": 1.6863681033272497, "grad_norm": 0.3782164454460144, "learning_rate": 4.78805871018591e-06, "loss": 0.3702, "step": 4004 }, { "epoch": 1.6867892741822266, "grad_norm": 0.5128300189971924, "learning_rate": 4.785610015951299e-06, "loss": 0.4709, "step": 4005 }, { "epoch": 1.6872104450372034, "grad_norm": 0.4654075503349304, "learning_rate": 4.7831613732305e-06, "loss": 0.4581, "step": 4006 }, { "epoch": 1.6876316158921802, "grad_norm": 0.4177108108997345, "learning_rate": 4.780712782611875e-06, "loss": 0.4491, "step": 4007 }, { "epoch": 1.688052786747157, "grad_norm": 0.4684232175350189, "learning_rate": 4.778264244683772e-06, "loss": 0.4514, "step": 4008 }, { "epoch": 1.688473957602134, "grad_norm": 0.3888886868953705, "learning_rate": 4.77581576003453e-06, "loss": 0.3808, "step": 4009 }, { "epoch": 1.6888951284571108, "grad_norm": 0.41843101382255554, "learning_rate": 4.773367329252472e-06, "loss": 0.4795, "step": 4010 }, { "epoch": 1.6893162993120876, "grad_norm": 0.46666979789733887, "learning_rate": 4.770918952925908e-06, "loss": 0.5013, "step": 4011 }, { "epoch": 1.6897374701670644, "grad_norm": 0.435630202293396, "learning_rate": 4.768470631643139e-06, "loss": 0.4707, "step": 4012 }, { "epoch": 1.6901586410220413, "grad_norm": 0.41861391067504883, "learning_rate": 4.766022365992444e-06, "loss": 0.3914, "step": 4013 }, { "epoch": 1.6905798118770181, "grad_norm": 0.43658074736595154, "learning_rate": 4.763574156562101e-06, "loss": 0.4205, "step": 4014 }, { "epoch": 1.691000982731995, "grad_norm": 0.4109232425689697, "learning_rate": 4.761126003940366e-06, "loss": 0.462, "step": 4015 }, { "epoch": 1.6914221535869718, "grad_norm": 0.4750286936759949, "learning_rate": 4.75867790871548e-06, "loss": 0.5067, "step": 4016 }, { "epoch": 1.6918433244419486, "grad_norm": 0.400732159614563, "learning_rate": 4.756229871475676e-06, "loss": 0.4001, "step": 4017 }, { "epoch": 1.6922644952969255, "grad_norm": 0.48005247116088867, "learning_rate": 4.7537818928091715e-06, "loss": 0.4992, "step": 4018 }, { "epoch": 1.6926856661519023, "grad_norm": 0.46652305126190186, "learning_rate": 4.751333973304166e-06, "loss": 0.4582, "step": 4019 }, { "epoch": 1.6931068370068791, "grad_norm": 0.46491459012031555, "learning_rate": 4.748886113548851e-06, "loss": 0.4938, "step": 4020 }, { "epoch": 1.693528007861856, "grad_norm": 0.4271242320537567, "learning_rate": 4.746438314131399e-06, "loss": 0.4253, "step": 4021 }, { "epoch": 1.6939491787168328, "grad_norm": 0.4839342534542084, "learning_rate": 4.743990575639968e-06, "loss": 0.483, "step": 4022 }, { "epoch": 1.6943703495718097, "grad_norm": 0.43553426861763, "learning_rate": 4.741542898662704e-06, "loss": 0.4589, "step": 4023 }, { "epoch": 1.6947915204267865, "grad_norm": 0.42975932359695435, "learning_rate": 4.739095283787738e-06, "loss": 0.4596, "step": 4024 }, { "epoch": 1.6952126912817633, "grad_norm": 0.4165743589401245, "learning_rate": 4.736647731603183e-06, "loss": 0.4153, "step": 4025 }, { "epoch": 1.6956338621367402, "grad_norm": 0.4310729205608368, "learning_rate": 4.7342002426971405e-06, "loss": 0.4498, "step": 4026 }, { "epoch": 1.696055032991717, "grad_norm": 0.431247353553772, "learning_rate": 4.7317528176576936e-06, "loss": 0.4371, "step": 4027 }, { "epoch": 1.6964762038466938, "grad_norm": 0.41163870692253113, "learning_rate": 4.729305457072913e-06, "loss": 0.4553, "step": 4028 }, { "epoch": 1.6968973747016707, "grad_norm": 0.4190461337566376, "learning_rate": 4.726858161530853e-06, "loss": 0.4435, "step": 4029 }, { "epoch": 1.6973185455566475, "grad_norm": 0.36770564317703247, "learning_rate": 4.72441093161955e-06, "loss": 0.3565, "step": 4030 }, { "epoch": 1.6977397164116244, "grad_norm": 0.44813084602355957, "learning_rate": 4.7219637679270295e-06, "loss": 0.5112, "step": 4031 }, { "epoch": 1.6981608872666012, "grad_norm": 0.4531267285346985, "learning_rate": 4.719516671041295e-06, "loss": 0.4743, "step": 4032 }, { "epoch": 1.698582058121578, "grad_norm": 0.4199783504009247, "learning_rate": 4.717069641550337e-06, "loss": 0.385, "step": 4033 }, { "epoch": 1.6990032289765549, "grad_norm": 0.42886874079704285, "learning_rate": 4.7146226800421355e-06, "loss": 0.5005, "step": 4034 }, { "epoch": 1.6994243998315317, "grad_norm": 0.4496636688709259, "learning_rate": 4.712175787104642e-06, "loss": 0.4522, "step": 4035 }, { "epoch": 1.6998455706865085, "grad_norm": 0.41790059208869934, "learning_rate": 4.7097289633257995e-06, "loss": 0.4174, "step": 4036 }, { "epoch": 1.7002667415414854, "grad_norm": 0.40940824151039124, "learning_rate": 4.707282209293534e-06, "loss": 0.3906, "step": 4037 }, { "epoch": 1.7006879123964622, "grad_norm": 0.3811332583427429, "learning_rate": 4.704835525595755e-06, "loss": 0.445, "step": 4038 }, { "epoch": 1.7011090832514388, "grad_norm": 0.4756951332092285, "learning_rate": 4.702388912820353e-06, "loss": 0.4702, "step": 4039 }, { "epoch": 1.701530254106416, "grad_norm": 0.41240715980529785, "learning_rate": 4.6999423715552e-06, "loss": 0.4209, "step": 4040 }, { "epoch": 1.7019514249613925, "grad_norm": 0.46357086300849915, "learning_rate": 4.697495902388155e-06, "loss": 0.4737, "step": 4041 }, { "epoch": 1.7023725958163696, "grad_norm": 0.4165528416633606, "learning_rate": 4.6950495059070565e-06, "loss": 0.4201, "step": 4042 }, { "epoch": 1.7027937666713462, "grad_norm": 0.378492534160614, "learning_rate": 4.6926031826997285e-06, "loss": 0.394, "step": 4043 }, { "epoch": 1.7032149375263232, "grad_norm": 0.41847240924835205, "learning_rate": 4.690156933353976e-06, "loss": 0.4141, "step": 4044 }, { "epoch": 1.7036361083812999, "grad_norm": 0.4404904544353485, "learning_rate": 4.687710758457583e-06, "loss": 0.4457, "step": 4045 }, { "epoch": 1.704057279236277, "grad_norm": 0.43401801586151123, "learning_rate": 4.6852646585983205e-06, "loss": 0.4534, "step": 4046 }, { "epoch": 1.7044784500912535, "grad_norm": 0.39472758769989014, "learning_rate": 4.6828186343639395e-06, "loss": 0.4188, "step": 4047 }, { "epoch": 1.7048996209462306, "grad_norm": 0.4329071044921875, "learning_rate": 4.680372686342173e-06, "loss": 0.4505, "step": 4048 }, { "epoch": 1.7053207918012072, "grad_norm": 0.41284823417663574, "learning_rate": 4.677926815120735e-06, "loss": 0.3986, "step": 4049 }, { "epoch": 1.7057419626561843, "grad_norm": 0.5051524639129639, "learning_rate": 4.675481021287321e-06, "loss": 0.5032, "step": 4050 }, { "epoch": 1.706163133511161, "grad_norm": 0.4764155149459839, "learning_rate": 4.6730353054296105e-06, "loss": 0.4624, "step": 4051 }, { "epoch": 1.706584304366138, "grad_norm": 0.3973332643508911, "learning_rate": 4.670589668135258e-06, "loss": 0.4209, "step": 4052 }, { "epoch": 1.7070054752211146, "grad_norm": 0.40676039457321167, "learning_rate": 4.668144109991909e-06, "loss": 0.4342, "step": 4053 }, { "epoch": 1.7074266460760916, "grad_norm": 0.43003982305526733, "learning_rate": 4.665698631587183e-06, "loss": 0.4118, "step": 4054 }, { "epoch": 1.7078478169310682, "grad_norm": 0.4478260278701782, "learning_rate": 4.663253233508676e-06, "loss": 0.4894, "step": 4055 }, { "epoch": 1.7082689877860453, "grad_norm": 0.3733246922492981, "learning_rate": 4.6608079163439765e-06, "loss": 0.36, "step": 4056 }, { "epoch": 1.708690158641022, "grad_norm": 0.4456981420516968, "learning_rate": 4.658362680680645e-06, "loss": 0.5055, "step": 4057 }, { "epoch": 1.709111329495999, "grad_norm": 0.41391873359680176, "learning_rate": 4.655917527106226e-06, "loss": 0.4569, "step": 4058 }, { "epoch": 1.7095325003509756, "grad_norm": 0.3933614492416382, "learning_rate": 4.6534724562082435e-06, "loss": 0.4145, "step": 4059 }, { "epoch": 1.7099536712059527, "grad_norm": 0.4477573037147522, "learning_rate": 4.651027468574199e-06, "loss": 0.4218, "step": 4060 }, { "epoch": 1.7103748420609293, "grad_norm": 0.46968209743499756, "learning_rate": 4.648582564791577e-06, "loss": 0.4736, "step": 4061 }, { "epoch": 1.7107960129159063, "grad_norm": 0.36368831992149353, "learning_rate": 4.646137745447843e-06, "loss": 0.3988, "step": 4062 }, { "epoch": 1.711217183770883, "grad_norm": 0.4144364297389984, "learning_rate": 4.643693011130438e-06, "loss": 0.4076, "step": 4063 }, { "epoch": 1.71163835462586, "grad_norm": 0.4422188401222229, "learning_rate": 4.641248362426787e-06, "loss": 0.4413, "step": 4064 }, { "epoch": 1.7120595254808366, "grad_norm": 0.4570010006427765, "learning_rate": 4.63880379992429e-06, "loss": 0.4516, "step": 4065 }, { "epoch": 1.7124806963358137, "grad_norm": 0.411990761756897, "learning_rate": 4.63635932421033e-06, "loss": 0.4328, "step": 4066 }, { "epoch": 1.7129018671907903, "grad_norm": 0.427849143743515, "learning_rate": 4.633914935872268e-06, "loss": 0.4086, "step": 4067 }, { "epoch": 1.7133230380457674, "grad_norm": 0.43518704175949097, "learning_rate": 4.631470635497443e-06, "loss": 0.4663, "step": 4068 }, { "epoch": 1.713744208900744, "grad_norm": 0.4965638518333435, "learning_rate": 4.629026423673174e-06, "loss": 0.4937, "step": 4069 }, { "epoch": 1.714165379755721, "grad_norm": 0.4536665976047516, "learning_rate": 4.6265823009867574e-06, "loss": 0.4608, "step": 4070 }, { "epoch": 1.7145865506106976, "grad_norm": 0.4056556224822998, "learning_rate": 4.624138268025471e-06, "loss": 0.4012, "step": 4071 }, { "epoch": 1.7150077214656747, "grad_norm": 0.4676327407360077, "learning_rate": 4.621694325376566e-06, "loss": 0.4276, "step": 4072 }, { "epoch": 1.7154288923206513, "grad_norm": 0.4560127556324005, "learning_rate": 4.619250473627279e-06, "loss": 0.445, "step": 4073 }, { "epoch": 1.7158500631756284, "grad_norm": 0.44818544387817383, "learning_rate": 4.61680671336482e-06, "loss": 0.453, "step": 4074 }, { "epoch": 1.716271234030605, "grad_norm": 0.39852553606033325, "learning_rate": 4.614363045176374e-06, "loss": 0.3835, "step": 4075 }, { "epoch": 1.716692404885582, "grad_norm": 0.4185192286968231, "learning_rate": 4.611919469649112e-06, "loss": 0.4262, "step": 4076 }, { "epoch": 1.7171135757405587, "grad_norm": 0.44184091687202454, "learning_rate": 4.609475987370177e-06, "loss": 0.4649, "step": 4077 }, { "epoch": 1.7175347465955357, "grad_norm": 0.3953917920589447, "learning_rate": 4.607032598926691e-06, "loss": 0.3592, "step": 4078 }, { "epoch": 1.7179559174505123, "grad_norm": 0.4594789743423462, "learning_rate": 4.6045893049057544e-06, "loss": 0.4931, "step": 4079 }, { "epoch": 1.7183770883054894, "grad_norm": 0.41137880086898804, "learning_rate": 4.6021461058944425e-06, "loss": 0.4277, "step": 4080 }, { "epoch": 1.718798259160466, "grad_norm": 0.4187133014202118, "learning_rate": 4.599703002479809e-06, "loss": 0.4341, "step": 4081 }, { "epoch": 1.719219430015443, "grad_norm": 0.43292880058288574, "learning_rate": 4.597259995248887e-06, "loss": 0.4655, "step": 4082 }, { "epoch": 1.7196406008704197, "grad_norm": 0.48210248351097107, "learning_rate": 4.594817084788683e-06, "loss": 0.4592, "step": 4083 }, { "epoch": 1.7200617717253968, "grad_norm": 0.45205047726631165, "learning_rate": 4.592374271686182e-06, "loss": 0.4606, "step": 4084 }, { "epoch": 1.7204829425803734, "grad_norm": 0.4326614439487457, "learning_rate": 4.589931556528345e-06, "loss": 0.4684, "step": 4085 }, { "epoch": 1.7209041134353504, "grad_norm": 0.4273603558540344, "learning_rate": 4.587488939902109e-06, "loss": 0.3971, "step": 4086 }, { "epoch": 1.721325284290327, "grad_norm": 0.404781699180603, "learning_rate": 4.58504642239439e-06, "loss": 0.4394, "step": 4087 }, { "epoch": 1.7217464551453041, "grad_norm": 0.4293043911457062, "learning_rate": 4.582604004592075e-06, "loss": 0.5052, "step": 4088 }, { "epoch": 1.7221676260002807, "grad_norm": 0.43433046340942383, "learning_rate": 4.580161687082036e-06, "loss": 0.4133, "step": 4089 }, { "epoch": 1.7225887968552576, "grad_norm": 0.4599132239818573, "learning_rate": 4.577719470451109e-06, "loss": 0.4653, "step": 4090 }, { "epoch": 1.7230099677102344, "grad_norm": 0.38122180104255676, "learning_rate": 4.575277355286116e-06, "loss": 0.3943, "step": 4091 }, { "epoch": 1.7234311385652112, "grad_norm": 0.4649851322174072, "learning_rate": 4.572835342173846e-06, "loss": 0.3997, "step": 4092 }, { "epoch": 1.723852309420188, "grad_norm": 0.48012250661849976, "learning_rate": 4.570393431701074e-06, "loss": 0.4755, "step": 4093 }, { "epoch": 1.724273480275165, "grad_norm": 0.406497985124588, "learning_rate": 4.5679516244545424e-06, "loss": 0.3803, "step": 4094 }, { "epoch": 1.7246946511301418, "grad_norm": 0.4263075888156891, "learning_rate": 4.565509921020967e-06, "loss": 0.4845, "step": 4095 }, { "epoch": 1.7251158219851186, "grad_norm": 0.3852250576019287, "learning_rate": 4.563068321987047e-06, "loss": 0.4231, "step": 4096 }, { "epoch": 1.7255369928400954, "grad_norm": 0.43312403559684753, "learning_rate": 4.5606268279394485e-06, "loss": 0.4115, "step": 4097 }, { "epoch": 1.7259581636950723, "grad_norm": 0.4762032628059387, "learning_rate": 4.558185439464819e-06, "loss": 0.4831, "step": 4098 }, { "epoch": 1.726379334550049, "grad_norm": 0.474588543176651, "learning_rate": 4.555744157149776e-06, "loss": 0.4539, "step": 4099 }, { "epoch": 1.726800505405026, "grad_norm": 0.4496694505214691, "learning_rate": 4.553302981580912e-06, "loss": 0.4399, "step": 4100 }, { "epoch": 1.7272216762600028, "grad_norm": 0.42683908343315125, "learning_rate": 4.550861913344796e-06, "loss": 0.3929, "step": 4101 }, { "epoch": 1.7276428471149796, "grad_norm": 0.4150325357913971, "learning_rate": 4.54842095302797e-06, "loss": 0.4264, "step": 4102 }, { "epoch": 1.7280640179699565, "grad_norm": 0.48133566975593567, "learning_rate": 4.545980101216949e-06, "loss": 0.4915, "step": 4103 }, { "epoch": 1.7284851888249333, "grad_norm": 0.36925041675567627, "learning_rate": 4.543539358498225e-06, "loss": 0.3783, "step": 4104 }, { "epoch": 1.7289063596799101, "grad_norm": 0.45594844222068787, "learning_rate": 4.541098725458259e-06, "loss": 0.489, "step": 4105 }, { "epoch": 1.729327530534887, "grad_norm": 0.4970391094684601, "learning_rate": 4.53865820268349e-06, "loss": 0.4558, "step": 4106 }, { "epoch": 1.7297487013898638, "grad_norm": 0.40026426315307617, "learning_rate": 4.53621779076033e-06, "loss": 0.4155, "step": 4107 }, { "epoch": 1.7301698722448406, "grad_norm": 0.39996808767318726, "learning_rate": 4.533777490275161e-06, "loss": 0.404, "step": 4108 }, { "epoch": 1.7305910430998175, "grad_norm": 0.44470417499542236, "learning_rate": 4.531337301814343e-06, "loss": 0.4421, "step": 4109 }, { "epoch": 1.7310122139547943, "grad_norm": 0.43047478795051575, "learning_rate": 4.528897225964205e-06, "loss": 0.456, "step": 4110 }, { "epoch": 1.7314333848097712, "grad_norm": 0.35096338391304016, "learning_rate": 4.526457263311049e-06, "loss": 0.3664, "step": 4111 }, { "epoch": 1.731854555664748, "grad_norm": 0.40706419944763184, "learning_rate": 4.524017414441154e-06, "loss": 0.4332, "step": 4112 }, { "epoch": 1.7322757265197248, "grad_norm": 0.4051132798194885, "learning_rate": 4.521577679940769e-06, "loss": 0.4004, "step": 4113 }, { "epoch": 1.7326968973747017, "grad_norm": 0.4801773428916931, "learning_rate": 4.519138060396117e-06, "loss": 0.519, "step": 4114 }, { "epoch": 1.7331180682296785, "grad_norm": 0.431465744972229, "learning_rate": 4.5166985563933855e-06, "loss": 0.3975, "step": 4115 }, { "epoch": 1.7335392390846553, "grad_norm": 0.4331028461456299, "learning_rate": 4.514259168518746e-06, "loss": 0.411, "step": 4116 }, { "epoch": 1.7339604099396322, "grad_norm": 0.3963873088359833, "learning_rate": 4.511819897358336e-06, "loss": 0.3967, "step": 4117 }, { "epoch": 1.734381580794609, "grad_norm": 0.4148111939430237, "learning_rate": 4.509380743498264e-06, "loss": 0.4371, "step": 4118 }, { "epoch": 1.7348027516495859, "grad_norm": 0.40591999888420105, "learning_rate": 4.5069417075246156e-06, "loss": 0.4167, "step": 4119 }, { "epoch": 1.7352239225045627, "grad_norm": 0.4393736720085144, "learning_rate": 4.50450279002344e-06, "loss": 0.4669, "step": 4120 }, { "epoch": 1.7356450933595395, "grad_norm": 0.45523887872695923, "learning_rate": 4.502063991580765e-06, "loss": 0.4388, "step": 4121 }, { "epoch": 1.7360662642145164, "grad_norm": 0.4202254116535187, "learning_rate": 4.499625312782587e-06, "loss": 0.4337, "step": 4122 }, { "epoch": 1.7364874350694932, "grad_norm": 0.47689008712768555, "learning_rate": 4.497186754214873e-06, "loss": 0.4572, "step": 4123 }, { "epoch": 1.73690860592447, "grad_norm": 0.4547792971134186, "learning_rate": 4.494748316463563e-06, "loss": 0.4627, "step": 4124 }, { "epoch": 1.7373297767794469, "grad_norm": 0.4430050253868103, "learning_rate": 4.492310000114567e-06, "loss": 0.4118, "step": 4125 }, { "epoch": 1.7377509476344237, "grad_norm": 0.43294599652290344, "learning_rate": 4.4898718057537646e-06, "loss": 0.4256, "step": 4126 }, { "epoch": 1.7381721184894006, "grad_norm": 0.41164231300354004, "learning_rate": 4.487433733967007e-06, "loss": 0.4114, "step": 4127 }, { "epoch": 1.7385932893443774, "grad_norm": 0.4706136882305145, "learning_rate": 4.484995785340119e-06, "loss": 0.4632, "step": 4128 }, { "epoch": 1.7390144601993542, "grad_norm": 0.4084579050540924, "learning_rate": 4.4825579604588895e-06, "loss": 0.4261, "step": 4129 }, { "epoch": 1.739435631054331, "grad_norm": 0.38694947957992554, "learning_rate": 4.480120259909084e-06, "loss": 0.4176, "step": 4130 }, { "epoch": 1.739856801909308, "grad_norm": 0.46807020902633667, "learning_rate": 4.477682684276432e-06, "loss": 0.4912, "step": 4131 }, { "epoch": 1.7402779727642848, "grad_norm": 0.39583253860473633, "learning_rate": 4.4752452341466395e-06, "loss": 0.4174, "step": 4132 }, { "epoch": 1.7406991436192616, "grad_norm": 0.39429715275764465, "learning_rate": 4.47280791010538e-06, "loss": 0.42, "step": 4133 }, { "epoch": 1.7411203144742384, "grad_norm": 0.41701874136924744, "learning_rate": 4.470370712738291e-06, "loss": 0.4414, "step": 4134 }, { "epoch": 1.7415414853292153, "grad_norm": 0.4059222936630249, "learning_rate": 4.467933642630989e-06, "loss": 0.4355, "step": 4135 }, { "epoch": 1.741962656184192, "grad_norm": 0.4505034387111664, "learning_rate": 4.465496700369052e-06, "loss": 0.454, "step": 4136 }, { "epoch": 1.742383827039169, "grad_norm": 0.40449395775794983, "learning_rate": 4.463059886538034e-06, "loss": 0.4357, "step": 4137 }, { "epoch": 1.7428049978941458, "grad_norm": 0.41187676787376404, "learning_rate": 4.460623201723453e-06, "loss": 0.4042, "step": 4138 }, { "epoch": 1.7432261687491226, "grad_norm": 0.47375380992889404, "learning_rate": 4.4581866465107964e-06, "loss": 0.4654, "step": 4139 }, { "epoch": 1.7436473396040992, "grad_norm": 0.5231866240501404, "learning_rate": 4.455750221485524e-06, "loss": 0.5165, "step": 4140 }, { "epoch": 1.7440685104590763, "grad_norm": 0.3706248998641968, "learning_rate": 4.453313927233061e-06, "loss": 0.3951, "step": 4141 }, { "epoch": 1.744489681314053, "grad_norm": 0.43673276901245117, "learning_rate": 4.4508777643388016e-06, "loss": 0.4738, "step": 4142 }, { "epoch": 1.74491085216903, "grad_norm": 0.36347293853759766, "learning_rate": 4.4484417333881115e-06, "loss": 0.3552, "step": 4143 }, { "epoch": 1.7453320230240066, "grad_norm": 0.4028284251689911, "learning_rate": 4.44600583496632e-06, "loss": 0.4176, "step": 4144 }, { "epoch": 1.7457531938789836, "grad_norm": 0.41642308235168457, "learning_rate": 4.443570069658727e-06, "loss": 0.4146, "step": 4145 }, { "epoch": 1.7461743647339603, "grad_norm": 0.42147043347358704, "learning_rate": 4.441134438050603e-06, "loss": 0.4689, "step": 4146 }, { "epoch": 1.7465955355889373, "grad_norm": 0.4110603630542755, "learning_rate": 4.438698940727179e-06, "loss": 0.4068, "step": 4147 }, { "epoch": 1.747016706443914, "grad_norm": 0.4151744544506073, "learning_rate": 4.4362635782736645e-06, "loss": 0.4472, "step": 4148 }, { "epoch": 1.747437877298891, "grad_norm": 0.39949265122413635, "learning_rate": 4.433828351275225e-06, "loss": 0.4, "step": 4149 }, { "epoch": 1.7478590481538676, "grad_norm": 0.4322587549686432, "learning_rate": 4.431393260317002e-06, "loss": 0.4639, "step": 4150 }, { "epoch": 1.7482802190088447, "grad_norm": 0.4106384515762329, "learning_rate": 4.428958305984099e-06, "loss": 0.4345, "step": 4151 }, { "epoch": 1.7487013898638213, "grad_norm": 0.44846880435943604, "learning_rate": 4.426523488861591e-06, "loss": 0.4728, "step": 4152 }, { "epoch": 1.7491225607187983, "grad_norm": 0.38915306329727173, "learning_rate": 4.424088809534519e-06, "loss": 0.4176, "step": 4153 }, { "epoch": 1.749543731573775, "grad_norm": 0.4084332585334778, "learning_rate": 4.421654268587887e-06, "loss": 0.4584, "step": 4154 }, { "epoch": 1.749964902428752, "grad_norm": 0.44193825125694275, "learning_rate": 4.4192198666066685e-06, "loss": 0.4632, "step": 4155 }, { "epoch": 1.7503860732837286, "grad_norm": 0.38685041666030884, "learning_rate": 4.4167856041758064e-06, "loss": 0.4246, "step": 4156 }, { "epoch": 1.7508072441387057, "grad_norm": 0.40715551376342773, "learning_rate": 4.414351481880205e-06, "loss": 0.4011, "step": 4157 }, { "epoch": 1.7512284149936823, "grad_norm": 0.3735491633415222, "learning_rate": 4.411917500304741e-06, "loss": 0.3991, "step": 4158 }, { "epoch": 1.7516495858486594, "grad_norm": 0.4977808892726898, "learning_rate": 4.409483660034249e-06, "loss": 0.4942, "step": 4159 }, { "epoch": 1.752070756703636, "grad_norm": 0.4238739609718323, "learning_rate": 4.407049961653536e-06, "loss": 0.4255, "step": 4160 }, { "epoch": 1.752491927558613, "grad_norm": 0.3987452983856201, "learning_rate": 4.404616405747373e-06, "loss": 0.3889, "step": 4161 }, { "epoch": 1.7529130984135897, "grad_norm": 0.3915923237800598, "learning_rate": 4.402182992900498e-06, "loss": 0.4673, "step": 4162 }, { "epoch": 1.7533342692685667, "grad_norm": 0.39287805557250977, "learning_rate": 4.399749723697613e-06, "loss": 0.4242, "step": 4163 }, { "epoch": 1.7537554401235433, "grad_norm": 0.41790589690208435, "learning_rate": 4.397316598723385e-06, "loss": 0.446, "step": 4164 }, { "epoch": 1.7541766109785204, "grad_norm": 0.4479522407054901, "learning_rate": 4.394883618562449e-06, "loss": 0.455, "step": 4165 }, { "epoch": 1.754597781833497, "grad_norm": 0.39168328046798706, "learning_rate": 4.392450783799403e-06, "loss": 0.4359, "step": 4166 }, { "epoch": 1.755018952688474, "grad_norm": 0.4304407835006714, "learning_rate": 4.390018095018809e-06, "loss": 0.4178, "step": 4167 }, { "epoch": 1.7554401235434507, "grad_norm": 0.38943952322006226, "learning_rate": 4.3875855528052e-06, "loss": 0.4286, "step": 4168 }, { "epoch": 1.7558612943984278, "grad_norm": 0.4280793368816376, "learning_rate": 4.385153157743064e-06, "loss": 0.4485, "step": 4169 }, { "epoch": 1.7562824652534044, "grad_norm": 0.39325493574142456, "learning_rate": 4.382720910416861e-06, "loss": 0.4073, "step": 4170 }, { "epoch": 1.7567036361083814, "grad_norm": 0.38709476590156555, "learning_rate": 4.3802888114110155e-06, "loss": 0.4638, "step": 4171 }, { "epoch": 1.757124806963358, "grad_norm": 0.40316012501716614, "learning_rate": 4.377856861309912e-06, "loss": 0.4129, "step": 4172 }, { "epoch": 1.757545977818335, "grad_norm": 0.445727676153183, "learning_rate": 4.375425060697905e-06, "loss": 0.4434, "step": 4173 }, { "epoch": 1.7579671486733117, "grad_norm": 0.433190256357193, "learning_rate": 4.372993410159302e-06, "loss": 0.4589, "step": 4174 }, { "epoch": 1.7583883195282888, "grad_norm": 0.36866554617881775, "learning_rate": 4.37056191027839e-06, "loss": 0.3722, "step": 4175 }, { "epoch": 1.7588094903832654, "grad_norm": 0.4483208954334259, "learning_rate": 4.368130561639409e-06, "loss": 0.4593, "step": 4176 }, { "epoch": 1.7592306612382425, "grad_norm": 0.4539377987384796, "learning_rate": 4.365699364826565e-06, "loss": 0.4744, "step": 4177 }, { "epoch": 1.759651832093219, "grad_norm": 0.4079117476940155, "learning_rate": 4.36326832042403e-06, "loss": 0.3897, "step": 4178 }, { "epoch": 1.7600730029481961, "grad_norm": 0.4611620604991913, "learning_rate": 4.360837429015935e-06, "loss": 0.4938, "step": 4179 }, { "epoch": 1.7604941738031727, "grad_norm": 0.4056277573108673, "learning_rate": 4.358406691186377e-06, "loss": 0.4072, "step": 4180 }, { "epoch": 1.7609153446581498, "grad_norm": 0.4236477315425873, "learning_rate": 4.3559761075194185e-06, "loss": 0.3994, "step": 4181 }, { "epoch": 1.7613365155131264, "grad_norm": 0.4955408275127411, "learning_rate": 4.353545678599079e-06, "loss": 0.5223, "step": 4182 }, { "epoch": 1.7617576863681035, "grad_norm": 0.3848971724510193, "learning_rate": 4.3511154050093475e-06, "loss": 0.3936, "step": 4183 }, { "epoch": 1.76217885722308, "grad_norm": 0.42799708247184753, "learning_rate": 4.348685287334168e-06, "loss": 0.4508, "step": 4184 }, { "epoch": 1.7626000280780572, "grad_norm": 0.46295008063316345, "learning_rate": 4.346255326157454e-06, "loss": 0.4592, "step": 4185 }, { "epoch": 1.7630211989330338, "grad_norm": 0.47357791662216187, "learning_rate": 4.343825522063078e-06, "loss": 0.4391, "step": 4186 }, { "epoch": 1.7634423697880108, "grad_norm": 0.47226276993751526, "learning_rate": 4.341395875634875e-06, "loss": 0.4567, "step": 4187 }, { "epoch": 1.7638635406429874, "grad_norm": 0.44550931453704834, "learning_rate": 4.338966387456646e-06, "loss": 0.4345, "step": 4188 }, { "epoch": 1.7642847114979643, "grad_norm": 0.4057380259037018, "learning_rate": 4.336537058112147e-06, "loss": 0.468, "step": 4189 }, { "epoch": 1.7647058823529411, "grad_norm": 0.3926047086715698, "learning_rate": 4.3341078881850966e-06, "loss": 0.4437, "step": 4190 }, { "epoch": 1.765127053207918, "grad_norm": 0.40493690967559814, "learning_rate": 4.331678878259184e-06, "loss": 0.4135, "step": 4191 }, { "epoch": 1.7655482240628948, "grad_norm": 0.4351728856563568, "learning_rate": 4.3292500289180505e-06, "loss": 0.4786, "step": 4192 }, { "epoch": 1.7659693949178716, "grad_norm": 0.40757372975349426, "learning_rate": 4.326821340745304e-06, "loss": 0.418, "step": 4193 }, { "epoch": 1.7663905657728485, "grad_norm": 0.43382710218429565, "learning_rate": 4.324392814324509e-06, "loss": 0.4375, "step": 4194 }, { "epoch": 1.7668117366278253, "grad_norm": 0.4206988513469696, "learning_rate": 4.321964450239197e-06, "loss": 0.4298, "step": 4195 }, { "epoch": 1.7672329074828022, "grad_norm": 0.4147372841835022, "learning_rate": 4.3195362490728546e-06, "loss": 0.44, "step": 4196 }, { "epoch": 1.767654078337779, "grad_norm": 0.3837607204914093, "learning_rate": 4.317108211408934e-06, "loss": 0.4327, "step": 4197 }, { "epoch": 1.7680752491927558, "grad_norm": 0.4581671953201294, "learning_rate": 4.314680337830847e-06, "loss": 0.5218, "step": 4198 }, { "epoch": 1.7684964200477327, "grad_norm": 0.42213717103004456, "learning_rate": 4.312252628921962e-06, "loss": 0.4091, "step": 4199 }, { "epoch": 1.7689175909027095, "grad_norm": 0.432589054107666, "learning_rate": 4.309825085265613e-06, "loss": 0.4383, "step": 4200 }, { "epoch": 1.7693387617576863, "grad_norm": 0.41499996185302734, "learning_rate": 4.307397707445092e-06, "loss": 0.4634, "step": 4201 }, { "epoch": 1.7697599326126632, "grad_norm": 0.4378148913383484, "learning_rate": 4.304970496043652e-06, "loss": 0.4367, "step": 4202 }, { "epoch": 1.77018110346764, "grad_norm": 0.3981338441371918, "learning_rate": 4.302543451644506e-06, "loss": 0.4117, "step": 4203 }, { "epoch": 1.7706022743226169, "grad_norm": 0.41651517152786255, "learning_rate": 4.300116574830824e-06, "loss": 0.401, "step": 4204 }, { "epoch": 1.7710234451775937, "grad_norm": 0.4769996702671051, "learning_rate": 4.297689866185739e-06, "loss": 0.5029, "step": 4205 }, { "epoch": 1.7714446160325705, "grad_norm": 0.3947661221027374, "learning_rate": 4.295263326292344e-06, "loss": 0.3959, "step": 4206 }, { "epoch": 1.7718657868875474, "grad_norm": 0.39776119589805603, "learning_rate": 4.292836955733688e-06, "loss": 0.3736, "step": 4207 }, { "epoch": 1.7722869577425242, "grad_norm": 0.4550960958003998, "learning_rate": 4.2904107550927855e-06, "loss": 0.4449, "step": 4208 }, { "epoch": 1.772708128597501, "grad_norm": 0.4464071989059448, "learning_rate": 4.2879847249526016e-06, "loss": 0.439, "step": 4209 }, { "epoch": 1.7731292994524779, "grad_norm": 0.4745861887931824, "learning_rate": 4.285558865896065e-06, "loss": 0.4804, "step": 4210 }, { "epoch": 1.7735504703074547, "grad_norm": 0.3743687868118286, "learning_rate": 4.283133178506066e-06, "loss": 0.4036, "step": 4211 }, { "epoch": 1.7739716411624316, "grad_norm": 0.4337945878505707, "learning_rate": 4.28070766336545e-06, "loss": 0.4478, "step": 4212 }, { "epoch": 1.7743928120174084, "grad_norm": 0.4182162284851074, "learning_rate": 4.278282321057022e-06, "loss": 0.4591, "step": 4213 }, { "epoch": 1.7748139828723852, "grad_norm": 0.4459393620491028, "learning_rate": 4.275857152163544e-06, "loss": 0.4388, "step": 4214 }, { "epoch": 1.775235153727362, "grad_norm": 0.4315958321094513, "learning_rate": 4.273432157267739e-06, "loss": 0.4229, "step": 4215 }, { "epoch": 1.775656324582339, "grad_norm": 0.46281692385673523, "learning_rate": 4.271007336952286e-06, "loss": 0.4884, "step": 4216 }, { "epoch": 1.7760774954373157, "grad_norm": 0.4006381034851074, "learning_rate": 4.268582691799824e-06, "loss": 0.4151, "step": 4217 }, { "epoch": 1.7764986662922926, "grad_norm": 0.43503841757774353, "learning_rate": 4.266158222392949e-06, "loss": 0.4506, "step": 4218 }, { "epoch": 1.7769198371472694, "grad_norm": 0.4664459526538849, "learning_rate": 4.263733929314212e-06, "loss": 0.4317, "step": 4219 }, { "epoch": 1.7773410080022463, "grad_norm": 0.39449745416641235, "learning_rate": 4.261309813146127e-06, "loss": 0.4361, "step": 4220 }, { "epoch": 1.777762178857223, "grad_norm": 0.38550108671188354, "learning_rate": 4.2588858744711614e-06, "loss": 0.4581, "step": 4221 }, { "epoch": 1.7781833497122, "grad_norm": 0.4062041938304901, "learning_rate": 4.256462113871741e-06, "loss": 0.4375, "step": 4222 }, { "epoch": 1.7786045205671768, "grad_norm": 0.43904736638069153, "learning_rate": 4.254038531930253e-06, "loss": 0.4642, "step": 4223 }, { "epoch": 1.7790256914221536, "grad_norm": 0.44321489334106445, "learning_rate": 4.251615129229033e-06, "loss": 0.431, "step": 4224 }, { "epoch": 1.7794468622771304, "grad_norm": 0.4463675618171692, "learning_rate": 4.2491919063503785e-06, "loss": 0.4851, "step": 4225 }, { "epoch": 1.7798680331321073, "grad_norm": 0.45179301500320435, "learning_rate": 4.246768863876545e-06, "loss": 0.4511, "step": 4226 }, { "epoch": 1.7802892039870841, "grad_norm": 0.3859246075153351, "learning_rate": 4.244346002389744e-06, "loss": 0.4006, "step": 4227 }, { "epoch": 1.780710374842061, "grad_norm": 0.44094184041023254, "learning_rate": 4.241923322472144e-06, "loss": 0.4971, "step": 4228 }, { "epoch": 1.7811315456970378, "grad_norm": 0.3855718672275543, "learning_rate": 4.239500824705864e-06, "loss": 0.447, "step": 4229 }, { "epoch": 1.7815527165520146, "grad_norm": 0.38426917791366577, "learning_rate": 4.2370785096729875e-06, "loss": 0.4047, "step": 4230 }, { "epoch": 1.7819738874069915, "grad_norm": 0.4471796154975891, "learning_rate": 4.23465637795555e-06, "loss": 0.4894, "step": 4231 }, { "epoch": 1.7823950582619683, "grad_norm": 0.4105290472507477, "learning_rate": 4.232234430135542e-06, "loss": 0.3778, "step": 4232 }, { "epoch": 1.7828162291169452, "grad_norm": 0.4289083778858185, "learning_rate": 4.229812666794914e-06, "loss": 0.4249, "step": 4233 }, { "epoch": 1.783237399971922, "grad_norm": 0.44803404808044434, "learning_rate": 4.2273910885155655e-06, "loss": 0.4938, "step": 4234 }, { "epoch": 1.7836585708268988, "grad_norm": 0.3933154046535492, "learning_rate": 4.2249696958793575e-06, "loss": 0.4383, "step": 4235 }, { "epoch": 1.7840797416818757, "grad_norm": 0.40407446026802063, "learning_rate": 4.222548489468105e-06, "loss": 0.4247, "step": 4236 }, { "epoch": 1.7845009125368525, "grad_norm": 0.4282112717628479, "learning_rate": 4.220127469863577e-06, "loss": 0.4552, "step": 4237 }, { "epoch": 1.7849220833918293, "grad_norm": 0.4441573917865753, "learning_rate": 4.217706637647499e-06, "loss": 0.4569, "step": 4238 }, { "epoch": 1.785343254246806, "grad_norm": 0.39612698554992676, "learning_rate": 4.215285993401547e-06, "loss": 0.4385, "step": 4239 }, { "epoch": 1.785764425101783, "grad_norm": 0.4364543557167053, "learning_rate": 4.21286553770736e-06, "loss": 0.471, "step": 4240 }, { "epoch": 1.7861855959567596, "grad_norm": 0.4602329134941101, "learning_rate": 4.210445271146526e-06, "loss": 0.5011, "step": 4241 }, { "epoch": 1.7866067668117367, "grad_norm": 0.42551517486572266, "learning_rate": 4.208025194300588e-06, "loss": 0.4126, "step": 4242 }, { "epoch": 1.7870279376667133, "grad_norm": 0.4312095642089844, "learning_rate": 4.205605307751044e-06, "loss": 0.4234, "step": 4243 }, { "epoch": 1.7874491085216904, "grad_norm": 0.4320623278617859, "learning_rate": 4.2031856120793456e-06, "loss": 0.4294, "step": 4244 }, { "epoch": 1.787870279376667, "grad_norm": 0.44254615902900696, "learning_rate": 4.2007661078669014e-06, "loss": 0.4258, "step": 4245 }, { "epoch": 1.788291450231644, "grad_norm": 0.41148191690444946, "learning_rate": 4.19834679569507e-06, "loss": 0.434, "step": 4246 }, { "epoch": 1.7887126210866207, "grad_norm": 0.4423052966594696, "learning_rate": 4.19592767614517e-06, "loss": 0.4771, "step": 4247 }, { "epoch": 1.7891337919415977, "grad_norm": 0.37916508316993713, "learning_rate": 4.193508749798465e-06, "loss": 0.3922, "step": 4248 }, { "epoch": 1.7895549627965743, "grad_norm": 0.4448389410972595, "learning_rate": 4.191090017236177e-06, "loss": 0.4807, "step": 4249 }, { "epoch": 1.7899761336515514, "grad_norm": 0.39931127429008484, "learning_rate": 4.1886714790394825e-06, "loss": 0.448, "step": 4250 }, { "epoch": 1.790397304506528, "grad_norm": 0.41169536113739014, "learning_rate": 4.186253135789511e-06, "loss": 0.4039, "step": 4251 }, { "epoch": 1.790818475361505, "grad_norm": 0.39729735255241394, "learning_rate": 4.183834988067344e-06, "loss": 0.4238, "step": 4252 }, { "epoch": 1.7912396462164817, "grad_norm": 0.4041295349597931, "learning_rate": 4.181417036454014e-06, "loss": 0.3685, "step": 4253 }, { "epoch": 1.7916608170714587, "grad_norm": 0.44523152709007263, "learning_rate": 4.178999281530509e-06, "loss": 0.5055, "step": 4254 }, { "epoch": 1.7920819879264354, "grad_norm": 0.38854965567588806, "learning_rate": 4.176581723877771e-06, "loss": 0.4091, "step": 4255 }, { "epoch": 1.7925031587814124, "grad_norm": 0.40481096506118774, "learning_rate": 4.17416436407669e-06, "loss": 0.4172, "step": 4256 }, { "epoch": 1.792924329636389, "grad_norm": 0.4273054599761963, "learning_rate": 4.1717472027081154e-06, "loss": 0.4468, "step": 4257 }, { "epoch": 1.793345500491366, "grad_norm": 0.4338119328022003, "learning_rate": 4.169330240352841e-06, "loss": 0.4579, "step": 4258 }, { "epoch": 1.7937666713463427, "grad_norm": 0.4025488793849945, "learning_rate": 4.166913477591619e-06, "loss": 0.4306, "step": 4259 }, { "epoch": 1.7941878422013198, "grad_norm": 0.4333899915218353, "learning_rate": 4.1644969150051496e-06, "loss": 0.4517, "step": 4260 }, { "epoch": 1.7946090130562964, "grad_norm": 0.411192387342453, "learning_rate": 4.162080553174087e-06, "loss": 0.3931, "step": 4261 }, { "epoch": 1.7950301839112734, "grad_norm": 0.42779725790023804, "learning_rate": 4.159664392679039e-06, "loss": 0.4883, "step": 4262 }, { "epoch": 1.79545135476625, "grad_norm": 0.43935468792915344, "learning_rate": 4.157248434100559e-06, "loss": 0.4872, "step": 4263 }, { "epoch": 1.7958725256212271, "grad_norm": 0.3882574141025543, "learning_rate": 4.154832678019157e-06, "loss": 0.4084, "step": 4264 }, { "epoch": 1.7962936964762037, "grad_norm": 0.42259159684181213, "learning_rate": 4.152417125015294e-06, "loss": 0.4434, "step": 4265 }, { "epoch": 1.7967148673311808, "grad_norm": 0.43243083357810974, "learning_rate": 4.15000177566938e-06, "loss": 0.4492, "step": 4266 }, { "epoch": 1.7971360381861574, "grad_norm": 0.4408474266529083, "learning_rate": 4.14758663056178e-06, "loss": 0.4677, "step": 4267 }, { "epoch": 1.7975572090411345, "grad_norm": 0.4411344528198242, "learning_rate": 4.145171690272803e-06, "loss": 0.4441, "step": 4268 }, { "epoch": 1.797978379896111, "grad_norm": 0.40269389748573303, "learning_rate": 4.142756955382714e-06, "loss": 0.43, "step": 4269 }, { "epoch": 1.7983995507510881, "grad_norm": 0.3650054335594177, "learning_rate": 4.14034242647173e-06, "loss": 0.3951, "step": 4270 }, { "epoch": 1.7988207216060648, "grad_norm": 0.42359307408332825, "learning_rate": 4.1379281041200145e-06, "loss": 0.4358, "step": 4271 }, { "epoch": 1.7992418924610418, "grad_norm": 0.4105669856071472, "learning_rate": 4.135513988907684e-06, "loss": 0.4286, "step": 4272 }, { "epoch": 1.7996630633160184, "grad_norm": 0.42164891958236694, "learning_rate": 4.133100081414802e-06, "loss": 0.4313, "step": 4273 }, { "epoch": 1.8000842341709955, "grad_norm": 0.46802419424057007, "learning_rate": 4.130686382221386e-06, "loss": 0.4986, "step": 4274 }, { "epoch": 1.8005054050259721, "grad_norm": 0.41985023021698, "learning_rate": 4.128272891907401e-06, "loss": 0.4349, "step": 4275 }, { "epoch": 1.8009265758809492, "grad_norm": 0.4296795725822449, "learning_rate": 4.125859611052762e-06, "loss": 0.4447, "step": 4276 }, { "epoch": 1.8013477467359258, "grad_norm": 0.4688481390476227, "learning_rate": 4.123446540237338e-06, "loss": 0.5145, "step": 4277 }, { "epoch": 1.8017689175909029, "grad_norm": 0.3731391727924347, "learning_rate": 4.121033680040939e-06, "loss": 0.3623, "step": 4278 }, { "epoch": 1.8021900884458795, "grad_norm": 0.4506429135799408, "learning_rate": 4.11862103104333e-06, "loss": 0.5238, "step": 4279 }, { "epoch": 1.8026112593008565, "grad_norm": 0.4437498450279236, "learning_rate": 4.116208593824227e-06, "loss": 0.4449, "step": 4280 }, { "epoch": 1.8030324301558331, "grad_norm": 0.3932585120201111, "learning_rate": 4.11379636896329e-06, "loss": 0.4234, "step": 4281 }, { "epoch": 1.8034536010108102, "grad_norm": 0.4322595000267029, "learning_rate": 4.1113843570401325e-06, "loss": 0.418, "step": 4282 }, { "epoch": 1.8038747718657868, "grad_norm": 0.44118866324424744, "learning_rate": 4.108972558634312e-06, "loss": 0.4083, "step": 4283 }, { "epoch": 1.8042959427207639, "grad_norm": 0.46155840158462524, "learning_rate": 4.10656097432534e-06, "loss": 0.4773, "step": 4284 }, { "epoch": 1.8047171135757405, "grad_norm": 0.3758218586444855, "learning_rate": 4.104149604692672e-06, "loss": 0.3683, "step": 4285 }, { "epoch": 1.8051382844307176, "grad_norm": 0.41004782915115356, "learning_rate": 4.101738450315717e-06, "loss": 0.4629, "step": 4286 }, { "epoch": 1.8055594552856942, "grad_norm": 0.4415796101093292, "learning_rate": 4.099327511773828e-06, "loss": 0.4214, "step": 4287 }, { "epoch": 1.8059806261406712, "grad_norm": 0.4726293385028839, "learning_rate": 4.096916789646305e-06, "loss": 0.4773, "step": 4288 }, { "epoch": 1.8064017969956478, "grad_norm": 0.41273999214172363, "learning_rate": 4.0945062845124014e-06, "loss": 0.3983, "step": 4289 }, { "epoch": 1.8068229678506247, "grad_norm": 0.40670308470726013, "learning_rate": 4.092095996951314e-06, "loss": 0.417, "step": 4290 }, { "epoch": 1.8072441387056015, "grad_norm": 0.3927420377731323, "learning_rate": 4.0896859275421895e-06, "loss": 0.4369, "step": 4291 }, { "epoch": 1.8076653095605784, "grad_norm": 0.4326569437980652, "learning_rate": 4.0872760768641225e-06, "loss": 0.4513, "step": 4292 }, { "epoch": 1.8080864804155552, "grad_norm": 0.44175976514816284, "learning_rate": 4.084866445496151e-06, "loss": 0.4192, "step": 4293 }, { "epoch": 1.808507651270532, "grad_norm": 0.5130192637443542, "learning_rate": 4.082457034017265e-06, "loss": 0.4748, "step": 4294 }, { "epoch": 1.8089288221255089, "grad_norm": 0.45452237129211426, "learning_rate": 4.080047843006401e-06, "loss": 0.4501, "step": 4295 }, { "epoch": 1.8093499929804857, "grad_norm": 0.4337131381034851, "learning_rate": 4.077638873042439e-06, "loss": 0.4365, "step": 4296 }, { "epoch": 1.8097711638354625, "grad_norm": 0.3918549716472626, "learning_rate": 4.075230124704212e-06, "loss": 0.3962, "step": 4297 }, { "epoch": 1.8101923346904394, "grad_norm": 0.431852251291275, "learning_rate": 4.072821598570493e-06, "loss": 0.4801, "step": 4298 }, { "epoch": 1.8106135055454162, "grad_norm": 0.39578402042388916, "learning_rate": 4.0704132952200055e-06, "loss": 0.4157, "step": 4299 }, { "epoch": 1.811034676400393, "grad_norm": 0.4254263639450073, "learning_rate": 4.0680052152314185e-06, "loss": 0.4695, "step": 4300 }, { "epoch": 1.81145584725537, "grad_norm": 0.4207795262336731, "learning_rate": 4.065597359183348e-06, "loss": 0.434, "step": 4301 }, { "epoch": 1.8118770181103467, "grad_norm": 0.44708147644996643, "learning_rate": 4.063189727654358e-06, "loss": 0.4604, "step": 4302 }, { "epoch": 1.8122981889653236, "grad_norm": 0.3640076816082001, "learning_rate": 4.060782321222953e-06, "loss": 0.3944, "step": 4303 }, { "epoch": 1.8127193598203004, "grad_norm": 0.43330785632133484, "learning_rate": 4.058375140467588e-06, "loss": 0.423, "step": 4304 }, { "epoch": 1.8131405306752773, "grad_norm": 0.4399455785751343, "learning_rate": 4.05596818596666e-06, "loss": 0.44, "step": 4305 }, { "epoch": 1.813561701530254, "grad_norm": 0.4499785304069519, "learning_rate": 4.053561458298518e-06, "loss": 0.4113, "step": 4306 }, { "epoch": 1.813982872385231, "grad_norm": 0.4174349308013916, "learning_rate": 4.0511549580414535e-06, "loss": 0.4522, "step": 4307 }, { "epoch": 1.8144040432402078, "grad_norm": 0.4152759611606598, "learning_rate": 4.048748685773696e-06, "loss": 0.4381, "step": 4308 }, { "epoch": 1.8148252140951846, "grad_norm": 0.48462969064712524, "learning_rate": 4.046342642073433e-06, "loss": 0.4271, "step": 4309 }, { "epoch": 1.8152463849501614, "grad_norm": 0.36140355467796326, "learning_rate": 4.043936827518788e-06, "loss": 0.3969, "step": 4310 }, { "epoch": 1.8156675558051383, "grad_norm": 0.4153679311275482, "learning_rate": 4.041531242687832e-06, "loss": 0.4214, "step": 4311 }, { "epoch": 1.8160887266601151, "grad_norm": 0.4214799106121063, "learning_rate": 4.039125888158583e-06, "loss": 0.4821, "step": 4312 }, { "epoch": 1.816509897515092, "grad_norm": 0.38620930910110474, "learning_rate": 4.036720764508999e-06, "loss": 0.396, "step": 4313 }, { "epoch": 1.8169310683700688, "grad_norm": 0.40663379430770874, "learning_rate": 4.034315872316988e-06, "loss": 0.4135, "step": 4314 }, { "epoch": 1.8173522392250456, "grad_norm": 0.43015843629837036, "learning_rate": 4.0319112121603974e-06, "loss": 0.4687, "step": 4315 }, { "epoch": 1.8177734100800225, "grad_norm": 0.4441182017326355, "learning_rate": 4.029506784617021e-06, "loss": 0.4272, "step": 4316 }, { "epoch": 1.8181945809349993, "grad_norm": 0.4542538523674011, "learning_rate": 4.0271025902646e-06, "loss": 0.4652, "step": 4317 }, { "epoch": 1.8186157517899761, "grad_norm": 0.39740169048309326, "learning_rate": 4.024698629680811e-06, "loss": 0.4114, "step": 4318 }, { "epoch": 1.819036922644953, "grad_norm": 0.41273394227027893, "learning_rate": 4.022294903443284e-06, "loss": 0.4012, "step": 4319 }, { "epoch": 1.8194580934999298, "grad_norm": 0.43537789583206177, "learning_rate": 4.019891412129586e-06, "loss": 0.393, "step": 4320 }, { "epoch": 1.8198792643549067, "grad_norm": 0.39410650730133057, "learning_rate": 4.017488156317231e-06, "loss": 0.4208, "step": 4321 }, { "epoch": 1.8203004352098835, "grad_norm": 0.44603827595710754, "learning_rate": 4.015085136583678e-06, "loss": 0.4827, "step": 4322 }, { "epoch": 1.8207216060648603, "grad_norm": 0.381671667098999, "learning_rate": 4.012682353506322e-06, "loss": 0.375, "step": 4323 }, { "epoch": 1.8211427769198372, "grad_norm": 0.40570759773254395, "learning_rate": 4.0102798076625065e-06, "loss": 0.428, "step": 4324 }, { "epoch": 1.821563947774814, "grad_norm": 0.3651161193847656, "learning_rate": 4.00787749962952e-06, "loss": 0.3937, "step": 4325 }, { "epoch": 1.8219851186297908, "grad_norm": 0.44024530053138733, "learning_rate": 4.00547542998459e-06, "loss": 0.475, "step": 4326 }, { "epoch": 1.8224062894847677, "grad_norm": 0.47656700015068054, "learning_rate": 4.003073599304889e-06, "loss": 0.4548, "step": 4327 }, { "epoch": 1.8228274603397445, "grad_norm": 0.46280208230018616, "learning_rate": 4.000672008167527e-06, "loss": 0.441, "step": 4328 }, { "epoch": 1.8232486311947214, "grad_norm": 0.46965667605400085, "learning_rate": 3.998270657149564e-06, "loss": 0.4631, "step": 4329 }, { "epoch": 1.8236698020496982, "grad_norm": 0.4018886089324951, "learning_rate": 3.995869546827999e-06, "loss": 0.4158, "step": 4330 }, { "epoch": 1.824090972904675, "grad_norm": 0.4143933951854706, "learning_rate": 3.99346867777977e-06, "loss": 0.4774, "step": 4331 }, { "epoch": 1.8245121437596519, "grad_norm": 0.4665737748146057, "learning_rate": 3.991068050581763e-06, "loss": 0.5261, "step": 4332 }, { "epoch": 1.8249333146146287, "grad_norm": 0.4185100793838501, "learning_rate": 3.988667665810801e-06, "loss": 0.4654, "step": 4333 }, { "epoch": 1.8253544854696055, "grad_norm": 0.4216306507587433, "learning_rate": 3.98626752404365e-06, "loss": 0.4601, "step": 4334 }, { "epoch": 1.8257756563245824, "grad_norm": 0.38871318101882935, "learning_rate": 3.983867625857019e-06, "loss": 0.3964, "step": 4335 }, { "epoch": 1.8261968271795592, "grad_norm": 0.44953855872154236, "learning_rate": 3.981467971827559e-06, "loss": 0.4723, "step": 4336 }, { "epoch": 1.826617998034536, "grad_norm": 0.39086902141571045, "learning_rate": 3.979068562531859e-06, "loss": 0.4235, "step": 4337 }, { "epoch": 1.827039168889513, "grad_norm": 0.39729565382003784, "learning_rate": 3.976669398546451e-06, "loss": 0.3563, "step": 4338 }, { "epoch": 1.8274603397444897, "grad_norm": 0.44592610001564026, "learning_rate": 3.974270480447809e-06, "loss": 0.5066, "step": 4339 }, { "epoch": 1.8278815105994664, "grad_norm": 0.41020503640174866, "learning_rate": 3.971871808812347e-06, "loss": 0.442, "step": 4340 }, { "epoch": 1.8283026814544434, "grad_norm": 0.47400328516960144, "learning_rate": 3.969473384216422e-06, "loss": 0.4379, "step": 4341 }, { "epoch": 1.82872385230942, "grad_norm": 0.4130297601222992, "learning_rate": 3.9670752072363265e-06, "loss": 0.4493, "step": 4342 }, { "epoch": 1.829145023164397, "grad_norm": 0.41758376359939575, "learning_rate": 3.964677278448296e-06, "loss": 0.4394, "step": 4343 }, { "epoch": 1.8295661940193737, "grad_norm": 0.41038721799850464, "learning_rate": 3.962279598428508e-06, "loss": 0.4525, "step": 4344 }, { "epoch": 1.8299873648743508, "grad_norm": 0.3917198181152344, "learning_rate": 3.959882167753081e-06, "loss": 0.4558, "step": 4345 }, { "epoch": 1.8304085357293274, "grad_norm": 0.449878990650177, "learning_rate": 3.957484986998073e-06, "loss": 0.4909, "step": 4346 }, { "epoch": 1.8308297065843044, "grad_norm": 0.41951918601989746, "learning_rate": 3.9550880567394745e-06, "loss": 0.4205, "step": 4347 }, { "epoch": 1.831250877439281, "grad_norm": 0.38563328981399536, "learning_rate": 3.952691377553226e-06, "loss": 0.4239, "step": 4348 }, { "epoch": 1.8316720482942581, "grad_norm": 0.38856640458106995, "learning_rate": 3.950294950015204e-06, "loss": 0.434, "step": 4349 }, { "epoch": 1.8320932191492347, "grad_norm": 0.43636462092399597, "learning_rate": 3.947898774701223e-06, "loss": 0.4815, "step": 4350 }, { "epoch": 1.8325143900042118, "grad_norm": 0.41170641779899597, "learning_rate": 3.94550285218704e-06, "loss": 0.4345, "step": 4351 }, { "epoch": 1.8329355608591884, "grad_norm": 0.42833250761032104, "learning_rate": 3.943107183048345e-06, "loss": 0.4255, "step": 4352 }, { "epoch": 1.8333567317141655, "grad_norm": 0.3880200982093811, "learning_rate": 3.940711767860776e-06, "loss": 0.4328, "step": 4353 }, { "epoch": 1.833777902569142, "grad_norm": 0.44221845269203186, "learning_rate": 3.938316607199903e-06, "loss": 0.4455, "step": 4354 }, { "epoch": 1.8341990734241191, "grad_norm": 0.4222584068775177, "learning_rate": 3.9359217016412375e-06, "loss": 0.4354, "step": 4355 }, { "epoch": 1.8346202442790958, "grad_norm": 0.4412645101547241, "learning_rate": 3.933527051760232e-06, "loss": 0.4673, "step": 4356 }, { "epoch": 1.8350414151340728, "grad_norm": 0.40408650040626526, "learning_rate": 3.931132658132272e-06, "loss": 0.414, "step": 4357 }, { "epoch": 1.8354625859890494, "grad_norm": 0.3918989598751068, "learning_rate": 3.9287385213326845e-06, "loss": 0.4134, "step": 4358 }, { "epoch": 1.8358837568440265, "grad_norm": 0.4158703088760376, "learning_rate": 3.926344641936737e-06, "loss": 0.4452, "step": 4359 }, { "epoch": 1.836304927699003, "grad_norm": 0.4498731195926666, "learning_rate": 3.92395102051963e-06, "loss": 0.4317, "step": 4360 }, { "epoch": 1.8367260985539802, "grad_norm": 0.500850260257721, "learning_rate": 3.92155765765651e-06, "loss": 0.4646, "step": 4361 }, { "epoch": 1.8371472694089568, "grad_norm": 0.44631272554397583, "learning_rate": 3.9191645539224495e-06, "loss": 0.4112, "step": 4362 }, { "epoch": 1.8375684402639338, "grad_norm": 0.40096601843833923, "learning_rate": 3.9167717098924704e-06, "loss": 0.4071, "step": 4363 }, { "epoch": 1.8379896111189105, "grad_norm": 0.4169367849826813, "learning_rate": 3.914379126141524e-06, "loss": 0.4722, "step": 4364 }, { "epoch": 1.8384107819738875, "grad_norm": 0.40377354621887207, "learning_rate": 3.911986803244507e-06, "loss": 0.4293, "step": 4365 }, { "epoch": 1.8388319528288641, "grad_norm": 0.40541908144950867, "learning_rate": 3.9095947417762465e-06, "loss": 0.3898, "step": 4366 }, { "epoch": 1.8392531236838412, "grad_norm": 0.44926488399505615, "learning_rate": 3.907202942311506e-06, "loss": 0.4692, "step": 4367 }, { "epoch": 1.8396742945388178, "grad_norm": 0.42274054884910583, "learning_rate": 3.904811405424993e-06, "loss": 0.4189, "step": 4368 }, { "epoch": 1.8400954653937949, "grad_norm": 0.4125973582267761, "learning_rate": 3.902420131691347e-06, "loss": 0.4041, "step": 4369 }, { "epoch": 1.8405166362487715, "grad_norm": 2.3407938480377197, "learning_rate": 3.900029121685146e-06, "loss": 0.6277, "step": 4370 }, { "epoch": 1.8409378071037485, "grad_norm": 0.3628462851047516, "learning_rate": 3.897638375980904e-06, "loss": 0.3535, "step": 4371 }, { "epoch": 1.8413589779587252, "grad_norm": 0.5092913508415222, "learning_rate": 3.895247895153069e-06, "loss": 0.5351, "step": 4372 }, { "epoch": 1.8417801488137022, "grad_norm": 0.3548806309700012, "learning_rate": 3.89285767977603e-06, "loss": 0.359, "step": 4373 }, { "epoch": 1.8422013196686788, "grad_norm": 0.4499486982822418, "learning_rate": 3.890467730424109e-06, "loss": 0.4292, "step": 4374 }, { "epoch": 1.842622490523656, "grad_norm": 0.438240647315979, "learning_rate": 3.8880780476715664e-06, "loss": 0.4654, "step": 4375 }, { "epoch": 1.8430436613786325, "grad_norm": 0.4233414828777313, "learning_rate": 3.885688632092598e-06, "loss": 0.4084, "step": 4376 }, { "epoch": 1.8434648322336096, "grad_norm": 0.4256945550441742, "learning_rate": 3.883299484261332e-06, "loss": 0.4022, "step": 4377 }, { "epoch": 1.8438860030885862, "grad_norm": 0.38156604766845703, "learning_rate": 3.880910604751834e-06, "loss": 0.4122, "step": 4378 }, { "epoch": 1.8443071739435632, "grad_norm": 0.42203661799430847, "learning_rate": 3.878521994138111e-06, "loss": 0.4283, "step": 4379 }, { "epoch": 1.8447283447985399, "grad_norm": 0.3798612952232361, "learning_rate": 3.876133652994094e-06, "loss": 0.4079, "step": 4380 }, { "epoch": 1.845149515653517, "grad_norm": 0.3994918167591095, "learning_rate": 3.873745581893664e-06, "loss": 0.4228, "step": 4381 }, { "epoch": 1.8455706865084935, "grad_norm": 0.3887300491333008, "learning_rate": 3.871357781410621e-06, "loss": 0.4094, "step": 4382 }, { "epoch": 1.8459918573634706, "grad_norm": 0.45867136120796204, "learning_rate": 3.868970252118711e-06, "loss": 0.449, "step": 4383 }, { "epoch": 1.8464130282184472, "grad_norm": 0.4428192675113678, "learning_rate": 3.866582994591611e-06, "loss": 0.4306, "step": 4384 }, { "epoch": 1.8468341990734243, "grad_norm": 0.43239325284957886, "learning_rate": 3.864196009402935e-06, "loss": 0.4627, "step": 4385 }, { "epoch": 1.847255369928401, "grad_norm": 0.3996829688549042, "learning_rate": 3.861809297126229e-06, "loss": 0.4327, "step": 4386 }, { "epoch": 1.847676540783378, "grad_norm": 0.4515780210494995, "learning_rate": 3.859422858334972e-06, "loss": 0.4513, "step": 4387 }, { "epoch": 1.8480977116383546, "grad_norm": 0.44400879740715027, "learning_rate": 3.857036693602582e-06, "loss": 0.4584, "step": 4388 }, { "epoch": 1.8485188824933316, "grad_norm": 0.4243217706680298, "learning_rate": 3.854650803502409e-06, "loss": 0.4165, "step": 4389 }, { "epoch": 1.8489400533483082, "grad_norm": 0.4505618214607239, "learning_rate": 3.852265188607736e-06, "loss": 0.3927, "step": 4390 }, { "epoch": 1.849361224203285, "grad_norm": 0.4079761803150177, "learning_rate": 3.849879849491781e-06, "loss": 0.4091, "step": 4391 }, { "epoch": 1.849782395058262, "grad_norm": 0.46936163306236267, "learning_rate": 3.847494786727695e-06, "loss": 0.4662, "step": 4392 }, { "epoch": 1.8502035659132388, "grad_norm": 0.45451050996780396, "learning_rate": 3.845110000888562e-06, "loss": 0.4699, "step": 4393 }, { "epoch": 1.8506247367682156, "grad_norm": 0.4154779314994812, "learning_rate": 3.842725492547401e-06, "loss": 0.4061, "step": 4394 }, { "epoch": 1.8510459076231924, "grad_norm": 0.4405120015144348, "learning_rate": 3.840341262277164e-06, "loss": 0.3992, "step": 4395 }, { "epoch": 1.8514670784781693, "grad_norm": 0.4556160867214203, "learning_rate": 3.837957310650738e-06, "loss": 0.4627, "step": 4396 }, { "epoch": 1.851888249333146, "grad_norm": 0.43396317958831787, "learning_rate": 3.835573638240937e-06, "loss": 0.422, "step": 4397 }, { "epoch": 1.852309420188123, "grad_norm": 0.4415876865386963, "learning_rate": 3.8331902456205135e-06, "loss": 0.4471, "step": 4398 }, { "epoch": 1.8527305910430998, "grad_norm": 0.42066094279289246, "learning_rate": 3.8308071333621525e-06, "loss": 0.4089, "step": 4399 }, { "epoch": 1.8531517618980766, "grad_norm": 0.43338432908058167, "learning_rate": 3.828424302038468e-06, "loss": 0.4713, "step": 4400 }, { "epoch": 1.8535729327530535, "grad_norm": 0.41709810495376587, "learning_rate": 3.8260417522220126e-06, "loss": 0.3814, "step": 4401 }, { "epoch": 1.8539941036080303, "grad_norm": 0.4549160599708557, "learning_rate": 3.823659484485264e-06, "loss": 0.4976, "step": 4402 }, { "epoch": 1.8544152744630071, "grad_norm": 0.4071897566318512, "learning_rate": 3.821277499400634e-06, "loss": 0.4114, "step": 4403 }, { "epoch": 1.854836445317984, "grad_norm": 0.4426575303077698, "learning_rate": 3.818895797540472e-06, "loss": 0.4734, "step": 4404 }, { "epoch": 1.8552576161729608, "grad_norm": 0.4330425262451172, "learning_rate": 3.8165143794770544e-06, "loss": 0.4204, "step": 4405 }, { "epoch": 1.8556787870279376, "grad_norm": 0.46029847860336304, "learning_rate": 3.81413324578259e-06, "loss": 0.526, "step": 4406 }, { "epoch": 1.8560999578829145, "grad_norm": 0.40895000100135803, "learning_rate": 3.8117523970292193e-06, "loss": 0.3815, "step": 4407 }, { "epoch": 1.8565211287378913, "grad_norm": 0.4416244328022003, "learning_rate": 3.8093718337890147e-06, "loss": 0.4806, "step": 4408 }, { "epoch": 1.8569422995928682, "grad_norm": 0.4426153004169464, "learning_rate": 3.80699155663398e-06, "loss": 0.4738, "step": 4409 }, { "epoch": 1.857363470447845, "grad_norm": 0.43880584836006165, "learning_rate": 3.8046115661360506e-06, "loss": 0.4432, "step": 4410 }, { "epoch": 1.8577846413028218, "grad_norm": 0.4037121534347534, "learning_rate": 3.802231862867094e-06, "loss": 0.4192, "step": 4411 }, { "epoch": 1.8582058121577987, "grad_norm": 0.41968289017677307, "learning_rate": 3.799852447398904e-06, "loss": 0.479, "step": 4412 }, { "epoch": 1.8586269830127755, "grad_norm": 0.45245134830474854, "learning_rate": 3.7974733203032114e-06, "loss": 0.4526, "step": 4413 }, { "epoch": 1.8590481538677524, "grad_norm": 0.4108940660953522, "learning_rate": 3.795094482151674e-06, "loss": 0.4338, "step": 4414 }, { "epoch": 1.8594693247227292, "grad_norm": 0.4036713242530823, "learning_rate": 3.7927159335158814e-06, "loss": 0.4374, "step": 4415 }, { "epoch": 1.859890495577706, "grad_norm": 0.41545483469963074, "learning_rate": 3.7903376749673536e-06, "loss": 0.4587, "step": 4416 }, { "epoch": 1.8603116664326829, "grad_norm": 0.4276694655418396, "learning_rate": 3.78795970707754e-06, "loss": 0.4099, "step": 4417 }, { "epoch": 1.8607328372876597, "grad_norm": 0.46189823746681213, "learning_rate": 3.7855820304178202e-06, "loss": 0.5125, "step": 4418 }, { "epoch": 1.8611540081426365, "grad_norm": 0.4440730810165405, "learning_rate": 3.783204645559504e-06, "loss": 0.4349, "step": 4419 }, { "epoch": 1.8615751789976134, "grad_norm": 0.42293280363082886, "learning_rate": 3.7808275530738344e-06, "loss": 0.4118, "step": 4420 }, { "epoch": 1.8619963498525902, "grad_norm": 0.462079793214798, "learning_rate": 3.7784507535319815e-06, "loss": 0.4669, "step": 4421 }, { "epoch": 1.862417520707567, "grad_norm": 0.3939244747161865, "learning_rate": 3.7760742475050404e-06, "loss": 0.3989, "step": 4422 }, { "epoch": 1.862838691562544, "grad_norm": 0.4589352607727051, "learning_rate": 3.773698035564041e-06, "loss": 0.488, "step": 4423 }, { "epoch": 1.8632598624175207, "grad_norm": 0.43244150280952454, "learning_rate": 3.7713221182799446e-06, "loss": 0.4258, "step": 4424 }, { "epoch": 1.8636810332724976, "grad_norm": 0.4366668462753296, "learning_rate": 3.7689464962236367e-06, "loss": 0.434, "step": 4425 }, { "epoch": 1.8641022041274744, "grad_norm": 0.4119202494621277, "learning_rate": 3.7665711699659356e-06, "loss": 0.4311, "step": 4426 }, { "epoch": 1.8645233749824512, "grad_norm": 0.3996296226978302, "learning_rate": 3.764196140077584e-06, "loss": 0.4235, "step": 4427 }, { "epoch": 1.864944545837428, "grad_norm": 0.438073992729187, "learning_rate": 3.761821407129258e-06, "loss": 0.4582, "step": 4428 }, { "epoch": 1.865365716692405, "grad_norm": 0.4063250422477722, "learning_rate": 3.7594469716915594e-06, "loss": 0.4278, "step": 4429 }, { "epoch": 1.8657868875473818, "grad_norm": 0.47510412335395813, "learning_rate": 3.7570728343350205e-06, "loss": 0.4638, "step": 4430 }, { "epoch": 1.8662080584023586, "grad_norm": 0.4627365171909332, "learning_rate": 3.7546989956301015e-06, "loss": 0.4318, "step": 4431 }, { "epoch": 1.8666292292573354, "grad_norm": 0.42500704526901245, "learning_rate": 3.752325456147189e-06, "loss": 0.3988, "step": 4432 }, { "epoch": 1.8670504001123123, "grad_norm": 0.43525004386901855, "learning_rate": 3.7499522164566e-06, "loss": 0.4766, "step": 4433 }, { "epoch": 1.867471570967289, "grad_norm": 0.4307997524738312, "learning_rate": 3.747579277128578e-06, "loss": 0.4216, "step": 4434 }, { "epoch": 1.867892741822266, "grad_norm": 0.4636372923851013, "learning_rate": 3.745206638733294e-06, "loss": 0.4835, "step": 4435 }, { "epoch": 1.8683139126772428, "grad_norm": 0.42234987020492554, "learning_rate": 3.742834301840853e-06, "loss": 0.4713, "step": 4436 }, { "epoch": 1.8687350835322196, "grad_norm": 0.4274287521839142, "learning_rate": 3.7404622670212754e-06, "loss": 0.4269, "step": 4437 }, { "epoch": 1.8691562543871965, "grad_norm": 0.3855040967464447, "learning_rate": 3.738090534844519e-06, "loss": 0.3936, "step": 4438 }, { "epoch": 1.8695774252421733, "grad_norm": 0.4332999289035797, "learning_rate": 3.735719105880464e-06, "loss": 0.4694, "step": 4439 }, { "epoch": 1.8699985960971501, "grad_norm": 0.40083563327789307, "learning_rate": 3.7333479806989215e-06, "loss": 0.4043, "step": 4440 }, { "epoch": 1.8704197669521267, "grad_norm": 0.4261399209499359, "learning_rate": 3.7309771598696286e-06, "loss": 0.4649, "step": 4441 }, { "epoch": 1.8708409378071038, "grad_norm": 0.4143516421318054, "learning_rate": 3.7286066439622432e-06, "loss": 0.4121, "step": 4442 }, { "epoch": 1.8712621086620804, "grad_norm": 0.4082401394844055, "learning_rate": 3.726236433546359e-06, "loss": 0.471, "step": 4443 }, { "epoch": 1.8716832795170575, "grad_norm": 0.4320613443851471, "learning_rate": 3.723866529191491e-06, "loss": 0.471, "step": 4444 }, { "epoch": 1.872104450372034, "grad_norm": 0.4623996317386627, "learning_rate": 3.7214969314670847e-06, "loss": 0.4144, "step": 4445 }, { "epoch": 1.8725256212270112, "grad_norm": 0.3777497410774231, "learning_rate": 3.7191276409425024e-06, "loss": 0.3908, "step": 4446 }, { "epoch": 1.8729467920819878, "grad_norm": 0.434379518032074, "learning_rate": 3.716758658187045e-06, "loss": 0.4275, "step": 4447 }, { "epoch": 1.8733679629369648, "grad_norm": 0.41537711024284363, "learning_rate": 3.7143899837699315e-06, "loss": 0.4441, "step": 4448 }, { "epoch": 1.8737891337919415, "grad_norm": 0.411554217338562, "learning_rate": 3.7120216182603102e-06, "loss": 0.4766, "step": 4449 }, { "epoch": 1.8742103046469185, "grad_norm": 0.40219971537590027, "learning_rate": 3.7096535622272544e-06, "loss": 0.4401, "step": 4450 }, { "epoch": 1.8746314755018951, "grad_norm": 0.39237093925476074, "learning_rate": 3.7072858162397606e-06, "loss": 0.4287, "step": 4451 }, { "epoch": 1.8750526463568722, "grad_norm": 0.3964400589466095, "learning_rate": 3.704918380866753e-06, "loss": 0.4301, "step": 4452 }, { "epoch": 1.8754738172118488, "grad_norm": 0.40915101766586304, "learning_rate": 3.702551256677083e-06, "loss": 0.441, "step": 4453 }, { "epoch": 1.8758949880668259, "grad_norm": 0.41165950894355774, "learning_rate": 3.700184444239524e-06, "loss": 0.4267, "step": 4454 }, { "epoch": 1.8763161589218025, "grad_norm": 0.3908890187740326, "learning_rate": 3.697817944122777e-06, "loss": 0.4306, "step": 4455 }, { "epoch": 1.8767373297767795, "grad_norm": 0.3872142732143402, "learning_rate": 3.695451756895465e-06, "loss": 0.4341, "step": 4456 }, { "epoch": 1.8771585006317562, "grad_norm": 0.3722248375415802, "learning_rate": 3.693085883126137e-06, "loss": 0.3984, "step": 4457 }, { "epoch": 1.8775796714867332, "grad_norm": 0.4250144958496094, "learning_rate": 3.69072032338327e-06, "loss": 0.4453, "step": 4458 }, { "epoch": 1.8780008423417098, "grad_norm": 0.45779287815093994, "learning_rate": 3.6883550782352596e-06, "loss": 0.4634, "step": 4459 }, { "epoch": 1.878422013196687, "grad_norm": 0.41411685943603516, "learning_rate": 3.6859901482504327e-06, "loss": 0.3892, "step": 4460 }, { "epoch": 1.8788431840516635, "grad_norm": 0.4016276001930237, "learning_rate": 3.6836255339970335e-06, "loss": 0.4667, "step": 4461 }, { "epoch": 1.8792643549066406, "grad_norm": 0.4208619296550751, "learning_rate": 3.681261236043233e-06, "loss": 0.4802, "step": 4462 }, { "epoch": 1.8796855257616172, "grad_norm": 0.3804731070995331, "learning_rate": 3.67889725495713e-06, "loss": 0.3733, "step": 4463 }, { "epoch": 1.8801066966165942, "grad_norm": 0.4084986448287964, "learning_rate": 3.6765335913067414e-06, "loss": 0.4429, "step": 4464 }, { "epoch": 1.8805278674715709, "grad_norm": 0.41049718856811523, "learning_rate": 3.674170245660013e-06, "loss": 0.4484, "step": 4465 }, { "epoch": 1.880949038326548, "grad_norm": 0.44029945135116577, "learning_rate": 3.6718072185848088e-06, "loss": 0.4819, "step": 4466 }, { "epoch": 1.8813702091815245, "grad_norm": 0.4144537150859833, "learning_rate": 3.6694445106489194e-06, "loss": 0.4328, "step": 4467 }, { "epoch": 1.8817913800365016, "grad_norm": 0.4198208749294281, "learning_rate": 3.667082122420059e-06, "loss": 0.4083, "step": 4468 }, { "epoch": 1.8822125508914782, "grad_norm": 0.39334604144096375, "learning_rate": 3.664720054465865e-06, "loss": 0.4093, "step": 4469 }, { "epoch": 1.8826337217464553, "grad_norm": 0.4631578326225281, "learning_rate": 3.662358307353897e-06, "loss": 0.4907, "step": 4470 }, { "epoch": 1.8830548926014319, "grad_norm": 0.4057151973247528, "learning_rate": 3.659996881651636e-06, "loss": 0.4007, "step": 4471 }, { "epoch": 1.883476063456409, "grad_norm": 0.40374556183815, "learning_rate": 3.657635777926488e-06, "loss": 0.3913, "step": 4472 }, { "epoch": 1.8838972343113856, "grad_norm": 0.5261768102645874, "learning_rate": 3.6552749967457823e-06, "loss": 0.4727, "step": 4473 }, { "epoch": 1.8843184051663626, "grad_norm": 0.4072650372982025, "learning_rate": 3.6529145386767677e-06, "loss": 0.405, "step": 4474 }, { "epoch": 1.8847395760213392, "grad_norm": 0.422852486371994, "learning_rate": 3.65055440428662e-06, "loss": 0.4482, "step": 4475 }, { "epoch": 1.8851607468763163, "grad_norm": 0.4719673991203308, "learning_rate": 3.648194594142431e-06, "loss": 0.4656, "step": 4476 }, { "epoch": 1.885581917731293, "grad_norm": 0.4420471787452698, "learning_rate": 3.645835108811219e-06, "loss": 0.5112, "step": 4477 }, { "epoch": 1.88600308858627, "grad_norm": 0.39400815963745117, "learning_rate": 3.6434759488599226e-06, "loss": 0.4402, "step": 4478 }, { "epoch": 1.8864242594412466, "grad_norm": 0.40535426139831543, "learning_rate": 3.641117114855405e-06, "loss": 0.4303, "step": 4479 }, { "epoch": 1.8868454302962236, "grad_norm": 0.3868614733219147, "learning_rate": 3.638758607364449e-06, "loss": 0.4077, "step": 4480 }, { "epoch": 1.8872666011512003, "grad_norm": 0.4071332514286041, "learning_rate": 3.6364004269537562e-06, "loss": 0.4095, "step": 4481 }, { "epoch": 1.8876877720061773, "grad_norm": 0.4289327561855316, "learning_rate": 3.634042574189952e-06, "loss": 0.4386, "step": 4482 }, { "epoch": 1.888108942861154, "grad_norm": 0.48824408650398254, "learning_rate": 3.6316850496395863e-06, "loss": 0.5078, "step": 4483 }, { "epoch": 1.888530113716131, "grad_norm": 0.4628366231918335, "learning_rate": 3.6293278538691247e-06, "loss": 0.4571, "step": 4484 }, { "epoch": 1.8889512845711076, "grad_norm": 0.435173362493515, "learning_rate": 3.6269709874449588e-06, "loss": 0.4267, "step": 4485 }, { "epoch": 1.8893724554260847, "grad_norm": 0.4080977141857147, "learning_rate": 3.6246144509333957e-06, "loss": 0.4143, "step": 4486 }, { "epoch": 1.8897936262810613, "grad_norm": 0.4273560345172882, "learning_rate": 3.6222582449006673e-06, "loss": 0.4101, "step": 4487 }, { "epoch": 1.8902147971360383, "grad_norm": 0.39741507172584534, "learning_rate": 3.6199023699129244e-06, "loss": 0.4144, "step": 4488 }, { "epoch": 1.890635967991015, "grad_norm": 0.4335480332374573, "learning_rate": 3.6175468265362395e-06, "loss": 0.45, "step": 4489 }, { "epoch": 1.8910571388459918, "grad_norm": 0.39418941736221313, "learning_rate": 3.6151916153366053e-06, "loss": 0.4188, "step": 4490 }, { "epoch": 1.8914783097009686, "grad_norm": 0.3590278923511505, "learning_rate": 3.612836736879931e-06, "loss": 0.4355, "step": 4491 }, { "epoch": 1.8918994805559455, "grad_norm": 0.3938893675804138, "learning_rate": 3.6104821917320515e-06, "loss": 0.4173, "step": 4492 }, { "epoch": 1.8923206514109223, "grad_norm": 0.4367144703865051, "learning_rate": 3.6081279804587176e-06, "loss": 0.4468, "step": 4493 }, { "epoch": 1.8927418222658992, "grad_norm": 0.4588538110256195, "learning_rate": 3.605774103625601e-06, "loss": 0.4629, "step": 4494 }, { "epoch": 1.893162993120876, "grad_norm": 0.4331955909729004, "learning_rate": 3.6034205617982977e-06, "loss": 0.4238, "step": 4495 }, { "epoch": 1.8935841639758528, "grad_norm": 0.5559256672859192, "learning_rate": 3.601067355542312e-06, "loss": 0.4361, "step": 4496 }, { "epoch": 1.8940053348308297, "grad_norm": 0.4580596387386322, "learning_rate": 3.598714485423078e-06, "loss": 0.4851, "step": 4497 }, { "epoch": 1.8944265056858065, "grad_norm": 0.47057604789733887, "learning_rate": 3.5963619520059432e-06, "loss": 0.4544, "step": 4498 }, { "epoch": 1.8948476765407833, "grad_norm": 0.45449909567832947, "learning_rate": 3.5940097558561798e-06, "loss": 0.4333, "step": 4499 }, { "epoch": 1.8952688473957602, "grad_norm": 0.4531959295272827, "learning_rate": 3.591657897538976e-06, "loss": 0.4543, "step": 4500 }, { "epoch": 1.895690018250737, "grad_norm": 0.39528700709342957, "learning_rate": 3.5893063776194325e-06, "loss": 0.4206, "step": 4501 }, { "epoch": 1.8961111891057139, "grad_norm": 0.42289191484451294, "learning_rate": 3.5869551966625795e-06, "loss": 0.4691, "step": 4502 }, { "epoch": 1.8965323599606907, "grad_norm": 0.45665332674980164, "learning_rate": 3.5846043552333586e-06, "loss": 0.4637, "step": 4503 }, { "epoch": 1.8969535308156675, "grad_norm": 0.38067835569381714, "learning_rate": 3.5822538538966333e-06, "loss": 0.4016, "step": 4504 }, { "epoch": 1.8973747016706444, "grad_norm": 0.44357073307037354, "learning_rate": 3.5799036932171847e-06, "loss": 0.4421, "step": 4505 }, { "epoch": 1.8977958725256212, "grad_norm": 0.4375670850276947, "learning_rate": 3.57755387375971e-06, "loss": 0.4475, "step": 4506 }, { "epoch": 1.898217043380598, "grad_norm": 0.39775562286376953, "learning_rate": 3.575204396088826e-06, "loss": 0.4047, "step": 4507 }, { "epoch": 1.8986382142355749, "grad_norm": 0.410889208316803, "learning_rate": 3.572855260769067e-06, "loss": 0.4185, "step": 4508 }, { "epoch": 1.8990593850905517, "grad_norm": 0.4556342363357544, "learning_rate": 3.570506468364886e-06, "loss": 0.4624, "step": 4509 }, { "epoch": 1.8994805559455286, "grad_norm": 0.44563615322113037, "learning_rate": 3.568158019440654e-06, "loss": 0.4194, "step": 4510 }, { "epoch": 1.8999017268005054, "grad_norm": 0.4314019978046417, "learning_rate": 3.565809914560656e-06, "loss": 0.4478, "step": 4511 }, { "epoch": 1.9003228976554822, "grad_norm": 0.4470682740211487, "learning_rate": 3.563462154289098e-06, "loss": 0.4323, "step": 4512 }, { "epoch": 1.900744068510459, "grad_norm": 0.41353940963745117, "learning_rate": 3.5611147391901008e-06, "loss": 0.4302, "step": 4513 }, { "epoch": 1.901165239365436, "grad_norm": 0.4293280839920044, "learning_rate": 3.558767669827703e-06, "loss": 0.4207, "step": 4514 }, { "epoch": 1.9015864102204127, "grad_norm": 0.48342907428741455, "learning_rate": 3.556420946765865e-06, "loss": 0.4919, "step": 4515 }, { "epoch": 1.9020075810753896, "grad_norm": 0.4155536890029907, "learning_rate": 3.5540745705684543e-06, "loss": 0.4066, "step": 4516 }, { "epoch": 1.9024287519303664, "grad_norm": 0.4728204011917114, "learning_rate": 3.551728541799262e-06, "loss": 0.475, "step": 4517 }, { "epoch": 1.9028499227853433, "grad_norm": 0.399263858795166, "learning_rate": 3.5493828610219926e-06, "loss": 0.4097, "step": 4518 }, { "epoch": 1.90327109364032, "grad_norm": 0.4481593072414398, "learning_rate": 3.5470375288002707e-06, "loss": 0.4531, "step": 4519 }, { "epoch": 1.903692264495297, "grad_norm": 0.4532383680343628, "learning_rate": 3.5446925456976355e-06, "loss": 0.4454, "step": 4520 }, { "epoch": 1.9041134353502738, "grad_norm": 0.43291258811950684, "learning_rate": 3.542347912277537e-06, "loss": 0.4455, "step": 4521 }, { "epoch": 1.9045346062052506, "grad_norm": 0.40951329469680786, "learning_rate": 3.540003629103349e-06, "loss": 0.4075, "step": 4522 }, { "epoch": 1.9049557770602275, "grad_norm": 0.4255315065383911, "learning_rate": 3.537659696738358e-06, "loss": 0.4399, "step": 4523 }, { "epoch": 1.9053769479152043, "grad_norm": 0.42629721760749817, "learning_rate": 3.535316115745765e-06, "loss": 0.4629, "step": 4524 }, { "epoch": 1.9057981187701811, "grad_norm": 0.3991374671459198, "learning_rate": 3.532972886688689e-06, "loss": 0.3977, "step": 4525 }, { "epoch": 1.906219289625158, "grad_norm": 0.4682493805885315, "learning_rate": 3.5306300101301622e-06, "loss": 0.4738, "step": 4526 }, { "epoch": 1.9066404604801348, "grad_norm": 0.42055898904800415, "learning_rate": 3.528287486633133e-06, "loss": 0.3998, "step": 4527 }, { "epoch": 1.9070616313351116, "grad_norm": 0.4040989279747009, "learning_rate": 3.525945316760464e-06, "loss": 0.4313, "step": 4528 }, { "epoch": 1.9074828021900885, "grad_norm": 0.45772114396095276, "learning_rate": 3.523603501074936e-06, "loss": 0.4502, "step": 4529 }, { "epoch": 1.9079039730450653, "grad_norm": 0.4946340322494507, "learning_rate": 3.5212620401392416e-06, "loss": 0.5062, "step": 4530 }, { "epoch": 1.9083251439000422, "grad_norm": 0.4840538203716278, "learning_rate": 3.5189209345159886e-06, "loss": 0.4573, "step": 4531 }, { "epoch": 1.908746314755019, "grad_norm": 0.42338883876800537, "learning_rate": 3.5165801847676995e-06, "loss": 0.4026, "step": 4532 }, { "epoch": 1.9091674856099958, "grad_norm": 0.4833667576313019, "learning_rate": 3.5142397914568127e-06, "loss": 0.4985, "step": 4533 }, { "epoch": 1.9095886564649727, "grad_norm": 0.4646002948284149, "learning_rate": 3.5118997551456785e-06, "loss": 0.4618, "step": 4534 }, { "epoch": 1.9100098273199495, "grad_norm": 0.4206227958202362, "learning_rate": 3.5095600763965675e-06, "loss": 0.4262, "step": 4535 }, { "epoch": 1.9104309981749263, "grad_norm": 0.4179682731628418, "learning_rate": 3.507220755771653e-06, "loss": 0.4363, "step": 4536 }, { "epoch": 1.9108521690299032, "grad_norm": 0.4051799178123474, "learning_rate": 3.504881793833032e-06, "loss": 0.3947, "step": 4537 }, { "epoch": 1.91127333988488, "grad_norm": 0.45643964409828186, "learning_rate": 3.502543191142713e-06, "loss": 0.4478, "step": 4538 }, { "epoch": 1.9116945107398569, "grad_norm": 0.42764848470687866, "learning_rate": 3.5002049482626176e-06, "loss": 0.4423, "step": 4539 }, { "epoch": 1.9121156815948335, "grad_norm": 0.4257296025753021, "learning_rate": 3.497867065754582e-06, "loss": 0.4397, "step": 4540 }, { "epoch": 1.9125368524498105, "grad_norm": 0.4457004964351654, "learning_rate": 3.49552954418035e-06, "loss": 0.4167, "step": 4541 }, { "epoch": 1.9129580233047871, "grad_norm": 0.4590655267238617, "learning_rate": 3.493192384101587e-06, "loss": 0.4359, "step": 4542 }, { "epoch": 1.9133791941597642, "grad_norm": 0.44399595260620117, "learning_rate": 3.4908555860798667e-06, "loss": 0.4412, "step": 4543 }, { "epoch": 1.9138003650147408, "grad_norm": 0.4079795777797699, "learning_rate": 3.4885191506766776e-06, "loss": 0.4208, "step": 4544 }, { "epoch": 1.9142215358697179, "grad_norm": 0.39102089405059814, "learning_rate": 3.4861830784534214e-06, "loss": 0.3937, "step": 4545 }, { "epoch": 1.9146427067246945, "grad_norm": 0.40294209122657776, "learning_rate": 3.483847369971408e-06, "loss": 0.4095, "step": 4546 }, { "epoch": 1.9150638775796716, "grad_norm": 0.4101625680923462, "learning_rate": 3.4815120257918665e-06, "loss": 0.4412, "step": 4547 }, { "epoch": 1.9154850484346482, "grad_norm": 0.4149177372455597, "learning_rate": 3.479177046475935e-06, "loss": 0.4245, "step": 4548 }, { "epoch": 1.9159062192896252, "grad_norm": 0.44778406620025635, "learning_rate": 3.476842432584665e-06, "loss": 0.4239, "step": 4549 }, { "epoch": 1.9163273901446018, "grad_norm": 0.41085729002952576, "learning_rate": 3.4745081846790176e-06, "loss": 0.4113, "step": 4550 }, { "epoch": 1.916748560999579, "grad_norm": 0.411345899105072, "learning_rate": 3.4721743033198685e-06, "loss": 0.4221, "step": 4551 }, { "epoch": 1.9171697318545555, "grad_norm": 0.38741761445999146, "learning_rate": 3.469840789068005e-06, "loss": 0.4298, "step": 4552 }, { "epoch": 1.9175909027095326, "grad_norm": 0.46924155950546265, "learning_rate": 3.4675076424841257e-06, "loss": 0.512, "step": 4553 }, { "epoch": 1.9180120735645092, "grad_norm": 0.4102037847042084, "learning_rate": 3.4651748641288436e-06, "loss": 0.4651, "step": 4554 }, { "epoch": 1.9184332444194863, "grad_norm": 0.3614097833633423, "learning_rate": 3.462842454562677e-06, "loss": 0.3618, "step": 4555 }, { "epoch": 1.9188544152744629, "grad_norm": 0.4609030485153198, "learning_rate": 3.4605104143460604e-06, "loss": 0.4615, "step": 4556 }, { "epoch": 1.91927558612944, "grad_norm": 0.42485111951828003, "learning_rate": 3.458178744039339e-06, "loss": 0.4353, "step": 4557 }, { "epoch": 1.9196967569844166, "grad_norm": 0.418906569480896, "learning_rate": 3.455847444202768e-06, "loss": 0.4353, "step": 4558 }, { "epoch": 1.9201179278393936, "grad_norm": 0.44384273886680603, "learning_rate": 3.4535165153965177e-06, "loss": 0.476, "step": 4559 }, { "epoch": 1.9205390986943702, "grad_norm": 0.4702363908290863, "learning_rate": 3.4511859581806596e-06, "loss": 0.44, "step": 4560 }, { "epoch": 1.9209602695493473, "grad_norm": 0.4128655791282654, "learning_rate": 3.448855773115185e-06, "loss": 0.4283, "step": 4561 }, { "epoch": 1.921381440404324, "grad_norm": 0.4205722510814667, "learning_rate": 3.4465259607599936e-06, "loss": 0.4253, "step": 4562 }, { "epoch": 1.921802611259301, "grad_norm": 0.42836371064186096, "learning_rate": 3.4441965216748942e-06, "loss": 0.4636, "step": 4563 }, { "epoch": 1.9222237821142776, "grad_norm": 0.41345950961112976, "learning_rate": 3.4418674564196063e-06, "loss": 0.4721, "step": 4564 }, { "epoch": 1.9226449529692546, "grad_norm": 0.4175971746444702, "learning_rate": 3.4395387655537594e-06, "loss": 0.4083, "step": 4565 }, { "epoch": 1.9230661238242313, "grad_norm": 0.4429607689380646, "learning_rate": 3.4372104496368934e-06, "loss": 0.4356, "step": 4566 }, { "epoch": 1.9234872946792083, "grad_norm": 0.42750340700149536, "learning_rate": 3.4348825092284587e-06, "loss": 0.4478, "step": 4567 }, { "epoch": 1.923908465534185, "grad_norm": 0.3661802411079407, "learning_rate": 3.4325549448878138e-06, "loss": 0.3783, "step": 4568 }, { "epoch": 1.924329636389162, "grad_norm": 0.4655521810054779, "learning_rate": 3.430227757174229e-06, "loss": 0.5482, "step": 4569 }, { "epoch": 1.9247508072441386, "grad_norm": 0.3800435960292816, "learning_rate": 3.4279009466468825e-06, "loss": 0.3834, "step": 4570 }, { "epoch": 1.9251719780991157, "grad_norm": 0.3768704831600189, "learning_rate": 3.425574513864862e-06, "loss": 0.4069, "step": 4571 }, { "epoch": 1.9255931489540923, "grad_norm": 0.3850620985031128, "learning_rate": 3.423248459387165e-06, "loss": 0.4157, "step": 4572 }, { "epoch": 1.9260143198090693, "grad_norm": 0.42200934886932373, "learning_rate": 3.4209227837726972e-06, "loss": 0.4246, "step": 4573 }, { "epoch": 1.926435490664046, "grad_norm": 0.4471469819545746, "learning_rate": 3.4185974875802774e-06, "loss": 0.447, "step": 4574 }, { "epoch": 1.926856661519023, "grad_norm": 0.3930429518222809, "learning_rate": 3.4162725713686257e-06, "loss": 0.4557, "step": 4575 }, { "epoch": 1.9272778323739996, "grad_norm": 0.41288119554519653, "learning_rate": 3.413948035696376e-06, "loss": 0.4151, "step": 4576 }, { "epoch": 1.9276990032289767, "grad_norm": 0.4385302662849426, "learning_rate": 3.411623881122069e-06, "loss": 0.4521, "step": 4577 }, { "epoch": 1.9281201740839533, "grad_norm": 0.46433940529823303, "learning_rate": 3.409300108204157e-06, "loss": 0.4866, "step": 4578 }, { "epoch": 1.9285413449389304, "grad_norm": 0.4595066010951996, "learning_rate": 3.4069767175009995e-06, "loss": 0.4712, "step": 4579 }, { "epoch": 1.928962515793907, "grad_norm": 0.48652058839797974, "learning_rate": 3.4046537095708565e-06, "loss": 0.5135, "step": 4580 }, { "epoch": 1.929383686648884, "grad_norm": 0.3761994242668152, "learning_rate": 3.4023310849719073e-06, "loss": 0.3804, "step": 4581 }, { "epoch": 1.9298048575038607, "grad_norm": 0.452248752117157, "learning_rate": 3.4000088442622337e-06, "loss": 0.4625, "step": 4582 }, { "epoch": 1.9302260283588377, "grad_norm": 0.4963609278202057, "learning_rate": 3.397686987999824e-06, "loss": 0.4748, "step": 4583 }, { "epoch": 1.9306471992138143, "grad_norm": 0.42555755376815796, "learning_rate": 3.395365516742578e-06, "loss": 0.4319, "step": 4584 }, { "epoch": 1.9310683700687914, "grad_norm": 0.4303411543369293, "learning_rate": 3.393044431048299e-06, "loss": 0.5081, "step": 4585 }, { "epoch": 1.931489540923768, "grad_norm": 0.3729874789714813, "learning_rate": 3.3907237314747003e-06, "loss": 0.4053, "step": 4586 }, { "epoch": 1.931910711778745, "grad_norm": 0.41583263874053955, "learning_rate": 3.3884034185794014e-06, "loss": 0.4729, "step": 4587 }, { "epoch": 1.9323318826337217, "grad_norm": 0.414997935295105, "learning_rate": 3.3860834929199294e-06, "loss": 0.4659, "step": 4588 }, { "epoch": 1.9327530534886987, "grad_norm": 0.44356203079223633, "learning_rate": 3.3837639550537183e-06, "loss": 0.4342, "step": 4589 }, { "epoch": 1.9331742243436754, "grad_norm": 0.4391612112522125, "learning_rate": 3.381444805538108e-06, "loss": 0.4248, "step": 4590 }, { "epoch": 1.9335953951986522, "grad_norm": 0.4317402243614197, "learning_rate": 3.3791260449303454e-06, "loss": 0.4212, "step": 4591 }, { "epoch": 1.934016566053629, "grad_norm": 0.4160108268260956, "learning_rate": 3.376807673787585e-06, "loss": 0.4321, "step": 4592 }, { "epoch": 1.9344377369086059, "grad_norm": 0.43092724680900574, "learning_rate": 3.374489692666886e-06, "loss": 0.4657, "step": 4593 }, { "epoch": 1.9348589077635827, "grad_norm": 0.4106234014034271, "learning_rate": 3.3721721021252185e-06, "loss": 0.4307, "step": 4594 }, { "epoch": 1.9352800786185596, "grad_norm": 0.4695303738117218, "learning_rate": 3.3698549027194505e-06, "loss": 0.4374, "step": 4595 }, { "epoch": 1.9357012494735364, "grad_norm": 0.44954612851142883, "learning_rate": 3.367538095006362e-06, "loss": 0.4326, "step": 4596 }, { "epoch": 1.9361224203285132, "grad_norm": 0.4572383761405945, "learning_rate": 3.365221679542639e-06, "loss": 0.4762, "step": 4597 }, { "epoch": 1.93654359118349, "grad_norm": 0.4255531430244446, "learning_rate": 3.362905656884871e-06, "loss": 0.4119, "step": 4598 }, { "epoch": 1.936964762038467, "grad_norm": 0.42045027017593384, "learning_rate": 3.3605900275895565e-06, "loss": 0.4756, "step": 4599 }, { "epoch": 1.9373859328934437, "grad_norm": 0.4021379351615906, "learning_rate": 3.3582747922130908e-06, "loss": 0.4522, "step": 4600 }, { "epoch": 1.9378071037484206, "grad_norm": 0.42907124757766724, "learning_rate": 3.355959951311786e-06, "loss": 0.4046, "step": 4601 }, { "epoch": 1.9382282746033974, "grad_norm": 0.39084360003471375, "learning_rate": 3.353645505441852e-06, "loss": 0.4364, "step": 4602 }, { "epoch": 1.9386494454583743, "grad_norm": 0.476650595664978, "learning_rate": 3.351331455159407e-06, "loss": 0.4389, "step": 4603 }, { "epoch": 1.939070616313351, "grad_norm": 0.4416521191596985, "learning_rate": 3.349017801020472e-06, "loss": 0.429, "step": 4604 }, { "epoch": 1.939491787168328, "grad_norm": 0.4339820146560669, "learning_rate": 3.3467045435809737e-06, "loss": 0.4353, "step": 4605 }, { "epoch": 1.9399129580233048, "grad_norm": 0.40957117080688477, "learning_rate": 3.344391683396744e-06, "loss": 0.4617, "step": 4606 }, { "epoch": 1.9403341288782816, "grad_norm": 0.33711695671081543, "learning_rate": 3.3420792210235188e-06, "loss": 0.3504, "step": 4607 }, { "epoch": 1.9407552997332584, "grad_norm": 0.41598764061927795, "learning_rate": 3.339767157016939e-06, "loss": 0.4924, "step": 4608 }, { "epoch": 1.9411764705882353, "grad_norm": 0.42025837302207947, "learning_rate": 3.3374554919325496e-06, "loss": 0.4147, "step": 4609 }, { "epoch": 1.9415976414432121, "grad_norm": 0.431022047996521, "learning_rate": 3.3351442263257983e-06, "loss": 0.4913, "step": 4610 }, { "epoch": 1.942018812298189, "grad_norm": 0.37487539649009705, "learning_rate": 3.3328333607520386e-06, "loss": 0.3722, "step": 4611 }, { "epoch": 1.9424399831531658, "grad_norm": 0.43708348274230957, "learning_rate": 3.330522895766527e-06, "loss": 0.4497, "step": 4612 }, { "epoch": 1.9428611540081426, "grad_norm": 0.5195362567901611, "learning_rate": 3.328212831924424e-06, "loss": 0.5106, "step": 4613 }, { "epoch": 1.9432823248631195, "grad_norm": 0.39203307032585144, "learning_rate": 3.325903169780797e-06, "loss": 0.4439, "step": 4614 }, { "epoch": 1.9437034957180963, "grad_norm": 0.4455963671207428, "learning_rate": 3.323593909890609e-06, "loss": 0.4503, "step": 4615 }, { "epoch": 1.9441246665730731, "grad_norm": 0.4256289005279541, "learning_rate": 3.321285052808731e-06, "loss": 0.4042, "step": 4616 }, { "epoch": 1.94454583742805, "grad_norm": 0.4527982175350189, "learning_rate": 3.3189765990899403e-06, "loss": 0.4753, "step": 4617 }, { "epoch": 1.9449670082830268, "grad_norm": 0.41956639289855957, "learning_rate": 3.316668549288913e-06, "loss": 0.4387, "step": 4618 }, { "epoch": 1.9453881791380037, "grad_norm": 0.4475945234298706, "learning_rate": 3.3143609039602297e-06, "loss": 0.4925, "step": 4619 }, { "epoch": 1.9458093499929805, "grad_norm": 0.3762500584125519, "learning_rate": 3.312053663658372e-06, "loss": 0.3722, "step": 4620 }, { "epoch": 1.9462305208479573, "grad_norm": 0.4657576084136963, "learning_rate": 3.3097468289377268e-06, "loss": 0.4436, "step": 4621 }, { "epoch": 1.9466516917029342, "grad_norm": 0.4648568332195282, "learning_rate": 3.307440400352581e-06, "loss": 0.4676, "step": 4622 }, { "epoch": 1.947072862557911, "grad_norm": 0.4107486307621002, "learning_rate": 3.305134378457127e-06, "loss": 0.4401, "step": 4623 }, { "epoch": 1.9474940334128878, "grad_norm": 0.42699098587036133, "learning_rate": 3.302828763805458e-06, "loss": 0.4215, "step": 4624 }, { "epoch": 1.9479152042678647, "grad_norm": 0.42642921209335327, "learning_rate": 3.300523556951567e-06, "loss": 0.4168, "step": 4625 }, { "epoch": 1.9483363751228415, "grad_norm": 0.4544716477394104, "learning_rate": 3.298218758449352e-06, "loss": 0.3895, "step": 4626 }, { "epoch": 1.9487575459778184, "grad_norm": 0.44009795784950256, "learning_rate": 3.295914368852612e-06, "loss": 0.4907, "step": 4627 }, { "epoch": 1.9491787168327952, "grad_norm": 0.43700408935546875, "learning_rate": 3.2936103887150484e-06, "loss": 0.4666, "step": 4628 }, { "epoch": 1.949599887687772, "grad_norm": 0.4084651470184326, "learning_rate": 3.2913068185902643e-06, "loss": 0.4091, "step": 4629 }, { "epoch": 1.9500210585427489, "grad_norm": 0.44505447149276733, "learning_rate": 3.2890036590317604e-06, "loss": 0.5125, "step": 4630 }, { "epoch": 1.9504422293977257, "grad_norm": 0.4108908176422119, "learning_rate": 3.2867009105929437e-06, "loss": 0.4438, "step": 4631 }, { "epoch": 1.9508634002527026, "grad_norm": 0.4829976260662079, "learning_rate": 3.28439857382712e-06, "loss": 0.472, "step": 4632 }, { "epoch": 1.9512845711076794, "grad_norm": 0.393932968378067, "learning_rate": 3.282096649287499e-06, "loss": 0.4255, "step": 4633 }, { "epoch": 1.9517057419626562, "grad_norm": 0.43538692593574524, "learning_rate": 3.279795137527189e-06, "loss": 0.4071, "step": 4634 }, { "epoch": 1.952126912817633, "grad_norm": 0.41817939281463623, "learning_rate": 3.277494039099196e-06, "loss": 0.4567, "step": 4635 }, { "epoch": 1.95254808367261, "grad_norm": 0.42848506569862366, "learning_rate": 3.2751933545564306e-06, "loss": 0.4275, "step": 4636 }, { "epoch": 1.9529692545275867, "grad_norm": 0.40475785732269287, "learning_rate": 3.2728930844517052e-06, "loss": 0.3842, "step": 4637 }, { "epoch": 1.9533904253825636, "grad_norm": 0.41966477036476135, "learning_rate": 3.2705932293377306e-06, "loss": 0.4491, "step": 4638 }, { "epoch": 1.9538115962375404, "grad_norm": 0.4367079436779022, "learning_rate": 3.268293789767118e-06, "loss": 0.4575, "step": 4639 }, { "epoch": 1.9542327670925173, "grad_norm": 0.45398762822151184, "learning_rate": 3.2659947662923767e-06, "loss": 0.3833, "step": 4640 }, { "epoch": 1.9546539379474939, "grad_norm": 0.4774326980113983, "learning_rate": 3.26369615946592e-06, "loss": 0.4917, "step": 4641 }, { "epoch": 1.955075108802471, "grad_norm": 0.4206695854663849, "learning_rate": 3.2613979698400576e-06, "loss": 0.458, "step": 4642 }, { "epoch": 1.9554962796574475, "grad_norm": 0.43801409006118774, "learning_rate": 3.259100197967002e-06, "loss": 0.4414, "step": 4643 }, { "epoch": 1.9559174505124246, "grad_norm": 0.4580310583114624, "learning_rate": 3.256802844398865e-06, "loss": 0.4852, "step": 4644 }, { "epoch": 1.9563386213674012, "grad_norm": 0.43526721000671387, "learning_rate": 3.254505909687653e-06, "loss": 0.4155, "step": 4645 }, { "epoch": 1.9567597922223783, "grad_norm": 0.442043274641037, "learning_rate": 3.252209394385278e-06, "loss": 0.4173, "step": 4646 }, { "epoch": 1.957180963077355, "grad_norm": 0.41764989495277405, "learning_rate": 3.2499132990435477e-06, "loss": 0.4393, "step": 4647 }, { "epoch": 1.957602133932332, "grad_norm": 0.41081082820892334, "learning_rate": 3.2476176242141693e-06, "loss": 0.4194, "step": 4648 }, { "epoch": 1.9580233047873086, "grad_norm": 0.433624804019928, "learning_rate": 3.2453223704487535e-06, "loss": 0.4384, "step": 4649 }, { "epoch": 1.9584444756422856, "grad_norm": 0.4319455325603485, "learning_rate": 3.2430275382988017e-06, "loss": 0.4264, "step": 4650 }, { "epoch": 1.9588656464972622, "grad_norm": 0.38850271701812744, "learning_rate": 3.2407331283157185e-06, "loss": 0.426, "step": 4651 }, { "epoch": 1.9592868173522393, "grad_norm": 0.4214298129081726, "learning_rate": 3.238439141050807e-06, "loss": 0.4308, "step": 4652 }, { "epoch": 1.959707988207216, "grad_norm": 0.48485317826271057, "learning_rate": 3.2361455770552717e-06, "loss": 0.4783, "step": 4653 }, { "epoch": 1.960129159062193, "grad_norm": 0.39150741696357727, "learning_rate": 3.233852436880208e-06, "loss": 0.3692, "step": 4654 }, { "epoch": 1.9605503299171696, "grad_norm": 0.41783010959625244, "learning_rate": 3.2315597210766146e-06, "loss": 0.4454, "step": 4655 }, { "epoch": 1.9609715007721467, "grad_norm": 0.45492029190063477, "learning_rate": 3.229267430195388e-06, "loss": 0.5145, "step": 4656 }, { "epoch": 1.9613926716271233, "grad_norm": 0.4448552131652832, "learning_rate": 3.226975564787322e-06, "loss": 0.3794, "step": 4657 }, { "epoch": 1.9618138424821003, "grad_norm": 0.4914184510707855, "learning_rate": 3.224684125403109e-06, "loss": 0.5321, "step": 4658 }, { "epoch": 1.962235013337077, "grad_norm": 0.3754063546657562, "learning_rate": 3.2223931125933346e-06, "loss": 0.3771, "step": 4659 }, { "epoch": 1.962656184192054, "grad_norm": 0.41448044776916504, "learning_rate": 3.220102526908488e-06, "loss": 0.4344, "step": 4660 }, { "epoch": 1.9630773550470306, "grad_norm": 0.4064524471759796, "learning_rate": 3.2178123688989526e-06, "loss": 0.4533, "step": 4661 }, { "epoch": 1.9634985259020077, "grad_norm": 0.39686745405197144, "learning_rate": 3.215522639115009e-06, "loss": 0.4337, "step": 4662 }, { "epoch": 1.9639196967569843, "grad_norm": 0.42924144864082336, "learning_rate": 3.2132333381068376e-06, "loss": 0.48, "step": 4663 }, { "epoch": 1.9643408676119614, "grad_norm": 0.4408440887928009, "learning_rate": 3.2109444664245102e-06, "loss": 0.471, "step": 4664 }, { "epoch": 1.964762038466938, "grad_norm": 0.42477530241012573, "learning_rate": 3.208656024618002e-06, "loss": 0.439, "step": 4665 }, { "epoch": 1.965183209321915, "grad_norm": 0.39455968141555786, "learning_rate": 3.20636801323718e-06, "loss": 0.3676, "step": 4666 }, { "epoch": 1.9656043801768917, "grad_norm": 0.45904070138931274, "learning_rate": 3.2040804328318093e-06, "loss": 0.535, "step": 4667 }, { "epoch": 1.9660255510318687, "grad_norm": 0.3960941433906555, "learning_rate": 3.201793283951554e-06, "loss": 0.4072, "step": 4668 }, { "epoch": 1.9664467218868453, "grad_norm": 0.4365546405315399, "learning_rate": 3.199506567145969e-06, "loss": 0.4366, "step": 4669 }, { "epoch": 1.9668678927418224, "grad_norm": 0.40137872099876404, "learning_rate": 3.1972202829645115e-06, "loss": 0.4479, "step": 4670 }, { "epoch": 1.967289063596799, "grad_norm": 0.4422924518585205, "learning_rate": 3.1949344319565295e-06, "loss": 0.4151, "step": 4671 }, { "epoch": 1.967710234451776, "grad_norm": 0.44204604625701904, "learning_rate": 3.1926490146712697e-06, "loss": 0.4303, "step": 4672 }, { "epoch": 1.9681314053067527, "grad_norm": 0.44869327545166016, "learning_rate": 3.1903640316578783e-06, "loss": 0.4566, "step": 4673 }, { "epoch": 1.9685525761617297, "grad_norm": 0.49947044253349304, "learning_rate": 3.1880794834653872e-06, "loss": 0.5118, "step": 4674 }, { "epoch": 1.9689737470167064, "grad_norm": 0.4328751862049103, "learning_rate": 3.1857953706427303e-06, "loss": 0.4286, "step": 4675 }, { "epoch": 1.9693949178716834, "grad_norm": 0.4318939745426178, "learning_rate": 3.18351169373874e-06, "loss": 0.4843, "step": 4676 }, { "epoch": 1.96981608872666, "grad_norm": 0.39034798741340637, "learning_rate": 3.181228453302137e-06, "loss": 0.4169, "step": 4677 }, { "epoch": 1.970237259581637, "grad_norm": 0.39417365193367004, "learning_rate": 3.178945649881543e-06, "loss": 0.4156, "step": 4678 }, { "epoch": 1.9706584304366137, "grad_norm": 0.40653419494628906, "learning_rate": 3.1766632840254684e-06, "loss": 0.4092, "step": 4679 }, { "epoch": 1.9710796012915908, "grad_norm": 0.4233283996582031, "learning_rate": 3.1743813562823256e-06, "loss": 0.431, "step": 4680 }, { "epoch": 1.9715007721465674, "grad_norm": 0.44803863763809204, "learning_rate": 3.1720998672004153e-06, "loss": 0.4562, "step": 4681 }, { "epoch": 1.9719219430015444, "grad_norm": 0.4133841395378113, "learning_rate": 3.169818817327937e-06, "loss": 0.4373, "step": 4682 }, { "epoch": 1.972343113856521, "grad_norm": 0.48091474175453186, "learning_rate": 3.1675382072129847e-06, "loss": 0.4916, "step": 4683 }, { "epoch": 1.9727642847114981, "grad_norm": 0.4124965965747833, "learning_rate": 3.1652580374035423e-06, "loss": 0.4584, "step": 4684 }, { "epoch": 1.9731854555664747, "grad_norm": 0.37427932024002075, "learning_rate": 3.1629783084474936e-06, "loss": 0.4129, "step": 4685 }, { "epoch": 1.9736066264214518, "grad_norm": 0.41840195655822754, "learning_rate": 3.1606990208926125e-06, "loss": 0.4391, "step": 4686 }, { "epoch": 1.9740277972764284, "grad_norm": 0.441527783870697, "learning_rate": 3.1584201752865683e-06, "loss": 0.4473, "step": 4687 }, { "epoch": 1.9744489681314055, "grad_norm": 0.457895427942276, "learning_rate": 3.156141772176925e-06, "loss": 0.4494, "step": 4688 }, { "epoch": 1.974870138986382, "grad_norm": 0.4227060079574585, "learning_rate": 3.153863812111138e-06, "loss": 0.405, "step": 4689 }, { "epoch": 1.9752913098413591, "grad_norm": 0.4346819519996643, "learning_rate": 3.1515862956365584e-06, "loss": 0.4991, "step": 4690 }, { "epoch": 1.9757124806963358, "grad_norm": 0.36654558777809143, "learning_rate": 3.149309223300428e-06, "loss": 0.383, "step": 4691 }, { "epoch": 1.9761336515513126, "grad_norm": 0.42765530943870544, "learning_rate": 3.1470325956498863e-06, "loss": 0.426, "step": 4692 }, { "epoch": 1.9765548224062894, "grad_norm": 0.419332355260849, "learning_rate": 3.1447564132319648e-06, "loss": 0.4214, "step": 4693 }, { "epoch": 1.9769759932612663, "grad_norm": 0.4655868113040924, "learning_rate": 3.142480676593582e-06, "loss": 0.4175, "step": 4694 }, { "epoch": 1.9773971641162431, "grad_norm": 0.42472943663597107, "learning_rate": 3.140205386281554e-06, "loss": 0.3992, "step": 4695 }, { "epoch": 1.97781833497122, "grad_norm": 0.4797898530960083, "learning_rate": 3.1379305428425933e-06, "loss": 0.5278, "step": 4696 }, { "epoch": 1.9782395058261968, "grad_norm": 0.39517420530319214, "learning_rate": 3.1356561468232994e-06, "loss": 0.4369, "step": 4697 }, { "epoch": 1.9786606766811736, "grad_norm": 0.3967428505420685, "learning_rate": 3.133382198770167e-06, "loss": 0.4405, "step": 4698 }, { "epoch": 1.9790818475361505, "grad_norm": 0.4912065863609314, "learning_rate": 3.1311086992295814e-06, "loss": 0.4278, "step": 4699 }, { "epoch": 1.9795030183911273, "grad_norm": 0.4391239881515503, "learning_rate": 3.1288356487478206e-06, "loss": 0.4918, "step": 4700 }, { "epoch": 1.9799241892461041, "grad_norm": 0.3825828433036804, "learning_rate": 3.126563047871055e-06, "loss": 0.3653, "step": 4701 }, { "epoch": 1.980345360101081, "grad_norm": 0.39839741587638855, "learning_rate": 3.124290897145349e-06, "loss": 0.4349, "step": 4702 }, { "epoch": 1.9807665309560578, "grad_norm": 0.3963446021080017, "learning_rate": 3.1220191971166563e-06, "loss": 0.4571, "step": 4703 }, { "epoch": 1.9811877018110347, "grad_norm": 0.40133440494537354, "learning_rate": 3.1197479483308213e-06, "loss": 0.3955, "step": 4704 }, { "epoch": 1.9816088726660115, "grad_norm": 0.43466174602508545, "learning_rate": 3.117477151333583e-06, "loss": 0.4431, "step": 4705 }, { "epoch": 1.9820300435209883, "grad_norm": 0.38206014037132263, "learning_rate": 3.11520680667057e-06, "loss": 0.4099, "step": 4706 }, { "epoch": 1.9824512143759652, "grad_norm": 0.4223995506763458, "learning_rate": 3.1129369148873023e-06, "loss": 0.4399, "step": 4707 }, { "epoch": 1.982872385230942, "grad_norm": 0.4081000089645386, "learning_rate": 3.1106674765291943e-06, "loss": 0.4253, "step": 4708 }, { "epoch": 1.9832935560859188, "grad_norm": 0.42887628078460693, "learning_rate": 3.1083984921415453e-06, "loss": 0.4082, "step": 4709 }, { "epoch": 1.9837147269408957, "grad_norm": 0.40618953108787537, "learning_rate": 3.1061299622695494e-06, "loss": 0.4222, "step": 4710 }, { "epoch": 1.9841358977958725, "grad_norm": 0.4005729556083679, "learning_rate": 3.1038618874582914e-06, "loss": 0.4253, "step": 4711 }, { "epoch": 1.9845570686508494, "grad_norm": 0.4208661913871765, "learning_rate": 3.1015942682527466e-06, "loss": 0.4191, "step": 4712 }, { "epoch": 1.9849782395058262, "grad_norm": 0.49361157417297363, "learning_rate": 3.0993271051977823e-06, "loss": 0.4883, "step": 4713 }, { "epoch": 1.985399410360803, "grad_norm": 0.45048174262046814, "learning_rate": 3.0970603988381498e-06, "loss": 0.4461, "step": 4714 }, { "epoch": 1.9858205812157799, "grad_norm": 0.4274326264858246, "learning_rate": 3.0947941497184985e-06, "loss": 0.4559, "step": 4715 }, { "epoch": 1.9862417520707567, "grad_norm": 0.3659490942955017, "learning_rate": 3.0925283583833644e-06, "loss": 0.3648, "step": 4716 }, { "epoch": 1.9866629229257335, "grad_norm": 0.4249193072319031, "learning_rate": 3.090263025377173e-06, "loss": 0.4694, "step": 4717 }, { "epoch": 1.9870840937807104, "grad_norm": 0.3942492604255676, "learning_rate": 3.0879981512442425e-06, "loss": 0.4293, "step": 4718 }, { "epoch": 1.9875052646356872, "grad_norm": 0.3627815842628479, "learning_rate": 3.085733736528776e-06, "loss": 0.4021, "step": 4719 }, { "epoch": 1.987926435490664, "grad_norm": 0.40711015462875366, "learning_rate": 3.0834697817748706e-06, "loss": 0.4342, "step": 4720 }, { "epoch": 1.988347606345641, "grad_norm": 0.41694945096969604, "learning_rate": 3.0812062875265105e-06, "loss": 0.4626, "step": 4721 }, { "epoch": 1.9887687772006177, "grad_norm": 0.3917085826396942, "learning_rate": 3.078943254327571e-06, "loss": 0.4006, "step": 4722 }, { "epoch": 1.9891899480555946, "grad_norm": 0.43097802996635437, "learning_rate": 3.0766806827218166e-06, "loss": 0.4562, "step": 4723 }, { "epoch": 1.9896111189105714, "grad_norm": 0.40689224004745483, "learning_rate": 3.074418573252897e-06, "loss": 0.4236, "step": 4724 }, { "epoch": 1.9900322897655482, "grad_norm": 0.35745587944984436, "learning_rate": 3.072156926464356e-06, "loss": 0.39, "step": 4725 }, { "epoch": 1.990453460620525, "grad_norm": 0.457504540681839, "learning_rate": 3.0698957428996236e-06, "loss": 0.4579, "step": 4726 }, { "epoch": 1.990874631475502, "grad_norm": 0.4591740071773529, "learning_rate": 3.0676350231020172e-06, "loss": 0.4778, "step": 4727 }, { "epoch": 1.9912958023304788, "grad_norm": 0.40808501839637756, "learning_rate": 3.06537476761475e-06, "loss": 0.368, "step": 4728 }, { "epoch": 1.9917169731854556, "grad_norm": 0.4464286267757416, "learning_rate": 3.063114976980912e-06, "loss": 0.48, "step": 4729 }, { "epoch": 1.9921381440404324, "grad_norm": 0.4332718849182129, "learning_rate": 3.06085565174349e-06, "loss": 0.4366, "step": 4730 }, { "epoch": 1.9925593148954093, "grad_norm": 0.46471208333969116, "learning_rate": 3.058596792445356e-06, "loss": 0.4181, "step": 4731 }, { "epoch": 1.992980485750386, "grad_norm": 0.37194231152534485, "learning_rate": 3.0563383996292717e-06, "loss": 0.3899, "step": 4732 }, { "epoch": 1.993401656605363, "grad_norm": 0.41586509346961975, "learning_rate": 3.0540804738378883e-06, "loss": 0.4607, "step": 4733 }, { "epoch": 1.9938228274603398, "grad_norm": 0.3684447109699249, "learning_rate": 3.051823015613735e-06, "loss": 0.3851, "step": 4734 }, { "epoch": 1.9942439983153166, "grad_norm": 0.4002470076084137, "learning_rate": 3.049566025499242e-06, "loss": 0.4287, "step": 4735 }, { "epoch": 1.9946651691702935, "grad_norm": 0.41620925068855286, "learning_rate": 3.0473095040367182e-06, "loss": 0.4138, "step": 4736 }, { "epoch": 1.9950863400252703, "grad_norm": 0.4050997793674469, "learning_rate": 3.045053451768364e-06, "loss": 0.4591, "step": 4737 }, { "epoch": 1.9955075108802471, "grad_norm": 0.4251793920993805, "learning_rate": 3.0427978692362647e-06, "loss": 0.3913, "step": 4738 }, { "epoch": 1.995928681735224, "grad_norm": 0.4058772027492523, "learning_rate": 3.0405427569823935e-06, "loss": 0.4207, "step": 4739 }, { "epoch": 1.9963498525902008, "grad_norm": 0.41572076082229614, "learning_rate": 3.038288115548611e-06, "loss": 0.4719, "step": 4740 }, { "epoch": 1.9967710234451777, "grad_norm": 0.3303303122520447, "learning_rate": 3.036033945476664e-06, "loss": 0.3307, "step": 4741 }, { "epoch": 1.9971921943001543, "grad_norm": 0.4513523578643799, "learning_rate": 3.033780247308187e-06, "loss": 0.4829, "step": 4742 }, { "epoch": 1.9976133651551313, "grad_norm": 0.4591463804244995, "learning_rate": 3.0315270215847015e-06, "loss": 0.452, "step": 4743 }, { "epoch": 1.998034536010108, "grad_norm": 0.4483758509159088, "learning_rate": 3.0292742688476125e-06, "loss": 0.4547, "step": 4744 }, { "epoch": 1.998455706865085, "grad_norm": 0.4265724718570709, "learning_rate": 3.0270219896382135e-06, "loss": 0.4506, "step": 4745 }, { "epoch": 1.9988768777200616, "grad_norm": 0.4181520342826843, "learning_rate": 3.0247701844976857e-06, "loss": 0.4113, "step": 4746 }, { "epoch": 1.9992980485750387, "grad_norm": 0.4219015836715698, "learning_rate": 3.0225188539670926e-06, "loss": 0.4569, "step": 4747 }, { "epoch": 1.9997192194300153, "grad_norm": 0.3543833792209625, "learning_rate": 3.0202679985873905e-06, "loss": 0.4077, "step": 4748 }, { "epoch": 2.0001403902849924, "grad_norm": 0.8730274438858032, "learning_rate": 3.0180176188994117e-06, "loss": 0.7847, "step": 4749 }, { "epoch": 2.000561561139969, "grad_norm": 0.36871111392974854, "learning_rate": 3.01576771544388e-06, "loss": 0.3745, "step": 4750 }, { "epoch": 2.000982731994946, "grad_norm": 0.4301457107067108, "learning_rate": 3.0135182887614063e-06, "loss": 0.4382, "step": 4751 }, { "epoch": 2.0014039028499226, "grad_norm": 0.42966368794441223, "learning_rate": 3.0112693393924843e-06, "loss": 0.4051, "step": 4752 }, { "epoch": 2.0018250737048997, "grad_norm": 0.4430968165397644, "learning_rate": 3.0090208678774947e-06, "loss": 0.3833, "step": 4753 }, { "epoch": 2.0022462445598763, "grad_norm": 0.46014511585235596, "learning_rate": 3.0067728747566984e-06, "loss": 0.4018, "step": 4754 }, { "epoch": 2.0026674154148534, "grad_norm": 0.4166060984134674, "learning_rate": 3.0045253605702474e-06, "loss": 0.3624, "step": 4755 }, { "epoch": 2.00308858626983, "grad_norm": 0.4278751313686371, "learning_rate": 3.0022783258581774e-06, "loss": 0.4114, "step": 4756 }, { "epoch": 2.003509757124807, "grad_norm": 0.42676281929016113, "learning_rate": 3.0000317711604067e-06, "loss": 0.4067, "step": 4757 }, { "epoch": 2.0039309279797837, "grad_norm": 0.42065033316612244, "learning_rate": 2.9977856970167395e-06, "loss": 0.4115, "step": 4758 }, { "epoch": 2.0043520988347607, "grad_norm": 0.40905678272247314, "learning_rate": 2.9955401039668642e-06, "loss": 0.4081, "step": 4759 }, { "epoch": 2.0047732696897373, "grad_norm": 0.43615400791168213, "learning_rate": 2.993294992550354e-06, "loss": 0.4252, "step": 4760 }, { "epoch": 2.0051944405447144, "grad_norm": 0.4547983407974243, "learning_rate": 2.9910503633066667e-06, "loss": 0.4324, "step": 4761 }, { "epoch": 2.005615611399691, "grad_norm": 0.3623282015323639, "learning_rate": 2.988806216775144e-06, "loss": 0.3422, "step": 4762 }, { "epoch": 2.006036782254668, "grad_norm": 0.47121143341064453, "learning_rate": 2.9865625534950106e-06, "loss": 0.47, "step": 4763 }, { "epoch": 2.0064579531096447, "grad_norm": 0.43481987714767456, "learning_rate": 2.984319374005375e-06, "loss": 0.4081, "step": 4764 }, { "epoch": 2.0068791239646218, "grad_norm": 0.4082168936729431, "learning_rate": 2.9820766788452314e-06, "loss": 0.3498, "step": 4765 }, { "epoch": 2.0073002948195984, "grad_norm": 0.4582860767841339, "learning_rate": 2.9798344685534563e-06, "loss": 0.4386, "step": 4766 }, { "epoch": 2.0077214656745754, "grad_norm": 0.5033254623413086, "learning_rate": 2.9775927436688123e-06, "loss": 0.4261, "step": 4767 }, { "epoch": 2.008142636529552, "grad_norm": 0.4069384038448334, "learning_rate": 2.9753515047299397e-06, "loss": 0.3825, "step": 4768 }, { "epoch": 2.008563807384529, "grad_norm": 0.4382459223270416, "learning_rate": 2.973110752275366e-06, "loss": 0.4148, "step": 4769 }, { "epoch": 2.0089849782395057, "grad_norm": 0.43604546785354614, "learning_rate": 2.9708704868435005e-06, "loss": 0.4335, "step": 4770 }, { "epoch": 2.009406149094483, "grad_norm": 0.404304563999176, "learning_rate": 2.9686307089726385e-06, "loss": 0.3556, "step": 4771 }, { "epoch": 2.0098273199494594, "grad_norm": 0.45196738839149475, "learning_rate": 2.966391419200957e-06, "loss": 0.4722, "step": 4772 }, { "epoch": 2.0102484908044365, "grad_norm": 0.3844744861125946, "learning_rate": 2.964152618066508e-06, "loss": 0.3411, "step": 4773 }, { "epoch": 2.010669661659413, "grad_norm": 0.419699490070343, "learning_rate": 2.9619143061072386e-06, "loss": 0.3757, "step": 4774 }, { "epoch": 2.01109083251439, "grad_norm": 0.41417819261550903, "learning_rate": 2.9596764838609693e-06, "loss": 0.4229, "step": 4775 }, { "epoch": 2.0115120033693668, "grad_norm": 0.4506896734237671, "learning_rate": 2.9574391518654077e-06, "loss": 0.3966, "step": 4776 }, { "epoch": 2.011933174224344, "grad_norm": 0.49162477254867554, "learning_rate": 2.955202310658142e-06, "loss": 0.4635, "step": 4777 }, { "epoch": 2.0123543450793204, "grad_norm": 0.4018242359161377, "learning_rate": 2.952965960776641e-06, "loss": 0.3826, "step": 4778 }, { "epoch": 2.0127755159342975, "grad_norm": 0.406294047832489, "learning_rate": 2.950730102758257e-06, "loss": 0.3699, "step": 4779 }, { "epoch": 2.013196686789274, "grad_norm": 0.46467283368110657, "learning_rate": 2.948494737140224e-06, "loss": 0.435, "step": 4780 }, { "epoch": 2.013617857644251, "grad_norm": 0.4632906913757324, "learning_rate": 2.946259864459658e-06, "loss": 0.4101, "step": 4781 }, { "epoch": 2.014039028499228, "grad_norm": 0.4458859860897064, "learning_rate": 2.9440254852535573e-06, "loss": 0.4282, "step": 4782 }, { "epoch": 2.014460199354205, "grad_norm": 0.42191800475120544, "learning_rate": 2.9417916000587976e-06, "loss": 0.4101, "step": 4783 }, { "epoch": 2.0148813702091815, "grad_norm": 0.3705589771270752, "learning_rate": 2.9395582094121405e-06, "loss": 0.3471, "step": 4784 }, { "epoch": 2.0153025410641585, "grad_norm": 0.4382895529270172, "learning_rate": 2.937325313850227e-06, "loss": 0.4316, "step": 4785 }, { "epoch": 2.015723711919135, "grad_norm": 0.4274343252182007, "learning_rate": 2.9350929139095784e-06, "loss": 0.423, "step": 4786 }, { "epoch": 2.016144882774112, "grad_norm": 0.4526175856590271, "learning_rate": 2.9328610101266006e-06, "loss": 0.45, "step": 4787 }, { "epoch": 2.016566053629089, "grad_norm": 0.41072317957878113, "learning_rate": 2.9306296030375734e-06, "loss": 0.4015, "step": 4788 }, { "epoch": 2.016987224484066, "grad_norm": 0.4210731089115143, "learning_rate": 2.9283986931786634e-06, "loss": 0.3976, "step": 4789 }, { "epoch": 2.0174083953390425, "grad_norm": 0.4224313199520111, "learning_rate": 2.9261682810859138e-06, "loss": 0.3814, "step": 4790 }, { "epoch": 2.0178295661940195, "grad_norm": 0.43761998414993286, "learning_rate": 2.9239383672952524e-06, "loss": 0.4346, "step": 4791 }, { "epoch": 2.018250737048996, "grad_norm": 0.45108598470687866, "learning_rate": 2.9217089523424857e-06, "loss": 0.4684, "step": 4792 }, { "epoch": 2.018671907903973, "grad_norm": 0.4026690721511841, "learning_rate": 2.9194800367632946e-06, "loss": 0.3593, "step": 4793 }, { "epoch": 2.01909307875895, "grad_norm": 0.43132439255714417, "learning_rate": 2.9172516210932487e-06, "loss": 0.4139, "step": 4794 }, { "epoch": 2.019514249613927, "grad_norm": 0.3845875561237335, "learning_rate": 2.9150237058677937e-06, "loss": 0.4162, "step": 4795 }, { "epoch": 2.0199354204689035, "grad_norm": 0.367552787065506, "learning_rate": 2.9127962916222517e-06, "loss": 0.3596, "step": 4796 }, { "epoch": 2.0203565913238806, "grad_norm": 0.39213940501213074, "learning_rate": 2.9105693788918327e-06, "loss": 0.3792, "step": 4797 }, { "epoch": 2.020777762178857, "grad_norm": 0.39358869194984436, "learning_rate": 2.9083429682116182e-06, "loss": 0.3916, "step": 4798 }, { "epoch": 2.0211989330338342, "grad_norm": 0.42125630378723145, "learning_rate": 2.9061170601165704e-06, "loss": 0.3984, "step": 4799 }, { "epoch": 2.021620103888811, "grad_norm": 0.44169488549232483, "learning_rate": 2.9038916551415347e-06, "loss": 0.4255, "step": 4800 }, { "epoch": 2.022041274743788, "grad_norm": 0.40037915110588074, "learning_rate": 2.901666753821235e-06, "loss": 0.356, "step": 4801 }, { "epoch": 2.0224624455987645, "grad_norm": 0.41001108288764954, "learning_rate": 2.899442356690271e-06, "loss": 0.3987, "step": 4802 }, { "epoch": 2.0228836164537416, "grad_norm": 0.40119296312332153, "learning_rate": 2.897218464283119e-06, "loss": 0.4438, "step": 4803 }, { "epoch": 2.023304787308718, "grad_norm": 0.45993679761886597, "learning_rate": 2.894995077134145e-06, "loss": 0.4478, "step": 4804 }, { "epoch": 2.0237259581636953, "grad_norm": 0.4281328618526459, "learning_rate": 2.8927721957775793e-06, "loss": 0.3371, "step": 4805 }, { "epoch": 2.024147129018672, "grad_norm": 0.4258725345134735, "learning_rate": 2.8905498207475434e-06, "loss": 0.4365, "step": 4806 }, { "epoch": 2.024568299873649, "grad_norm": 0.3806803822517395, "learning_rate": 2.88832795257803e-06, "loss": 0.3859, "step": 4807 }, { "epoch": 2.0249894707286256, "grad_norm": 0.43976733088493347, "learning_rate": 2.8861065918029085e-06, "loss": 0.4092, "step": 4808 }, { "epoch": 2.0254106415836026, "grad_norm": 0.37666580080986023, "learning_rate": 2.8838857389559328e-06, "loss": 0.3554, "step": 4809 }, { "epoch": 2.0258318124385792, "grad_norm": 0.43733587861061096, "learning_rate": 2.8816653945707286e-06, "loss": 0.4086, "step": 4810 }, { "epoch": 2.0262529832935563, "grad_norm": 0.3965684771537781, "learning_rate": 2.8794455591808056e-06, "loss": 0.3651, "step": 4811 }, { "epoch": 2.026674154148533, "grad_norm": 0.42395973205566406, "learning_rate": 2.8772262333195465e-06, "loss": 0.3983, "step": 4812 }, { "epoch": 2.02709532500351, "grad_norm": 0.4265291094779968, "learning_rate": 2.8750074175202092e-06, "loss": 0.4139, "step": 4813 }, { "epoch": 2.0275164958584866, "grad_norm": 0.42500874400138855, "learning_rate": 2.872789112315939e-06, "loss": 0.36, "step": 4814 }, { "epoch": 2.0279376667134636, "grad_norm": 0.4013976752758026, "learning_rate": 2.870571318239746e-06, "loss": 0.3664, "step": 4815 }, { "epoch": 2.0283588375684403, "grad_norm": 0.4667796492576599, "learning_rate": 2.8683540358245266e-06, "loss": 0.5133, "step": 4816 }, { "epoch": 2.028780008423417, "grad_norm": 0.38424599170684814, "learning_rate": 2.866137265603056e-06, "loss": 0.3897, "step": 4817 }, { "epoch": 2.029201179278394, "grad_norm": 0.40748754143714905, "learning_rate": 2.8639210081079728e-06, "loss": 0.3725, "step": 4818 }, { "epoch": 2.0296223501333706, "grad_norm": 0.4032590985298157, "learning_rate": 2.861705263871805e-06, "loss": 0.4062, "step": 4819 }, { "epoch": 2.0300435209883476, "grad_norm": 0.4179128408432007, "learning_rate": 2.859490033426957e-06, "loss": 0.4035, "step": 4820 }, { "epoch": 2.0304646918433242, "grad_norm": 0.3973870277404785, "learning_rate": 2.8572753173057004e-06, "loss": 0.3964, "step": 4821 }, { "epoch": 2.0308858626983013, "grad_norm": 0.46021369099617004, "learning_rate": 2.8550611160401963e-06, "loss": 0.4604, "step": 4822 }, { "epoch": 2.031307033553278, "grad_norm": 0.395222008228302, "learning_rate": 2.8528474301624676e-06, "loss": 0.3897, "step": 4823 }, { "epoch": 2.031728204408255, "grad_norm": 0.4170689880847931, "learning_rate": 2.850634260204423e-06, "loss": 0.4282, "step": 4824 }, { "epoch": 2.0321493752632316, "grad_norm": 0.4523600935935974, "learning_rate": 2.8484216066978475e-06, "loss": 0.4437, "step": 4825 }, { "epoch": 2.0325705461182086, "grad_norm": 0.37074384093284607, "learning_rate": 2.846209470174396e-06, "loss": 0.3576, "step": 4826 }, { "epoch": 2.0329917169731853, "grad_norm": 0.4666905105113983, "learning_rate": 2.8439978511656057e-06, "loss": 0.4327, "step": 4827 }, { "epoch": 2.0334128878281623, "grad_norm": 0.3678683638572693, "learning_rate": 2.841786750202885e-06, "loss": 0.3329, "step": 4828 }, { "epoch": 2.033834058683139, "grad_norm": 0.3711704909801483, "learning_rate": 2.839576167817517e-06, "loss": 0.4107, "step": 4829 }, { "epoch": 2.034255229538116, "grad_norm": 0.42912954092025757, "learning_rate": 2.8373661045406662e-06, "loss": 0.4388, "step": 4830 }, { "epoch": 2.0346764003930926, "grad_norm": 0.3884332478046417, "learning_rate": 2.835156560903365e-06, "loss": 0.3713, "step": 4831 }, { "epoch": 2.0350975712480697, "grad_norm": 0.45761439204216003, "learning_rate": 2.8329475374365277e-06, "loss": 0.4776, "step": 4832 }, { "epoch": 2.0355187421030463, "grad_norm": 0.4333609640598297, "learning_rate": 2.8307390346709384e-06, "loss": 0.3598, "step": 4833 }, { "epoch": 2.0359399129580233, "grad_norm": 0.4486697316169739, "learning_rate": 2.8285310531372574e-06, "loss": 0.4303, "step": 4834 }, { "epoch": 2.036361083813, "grad_norm": 0.4496927559375763, "learning_rate": 2.8263235933660206e-06, "loss": 0.444, "step": 4835 }, { "epoch": 2.036782254667977, "grad_norm": 0.3771916925907135, "learning_rate": 2.824116655887642e-06, "loss": 0.3368, "step": 4836 }, { "epoch": 2.0372034255229536, "grad_norm": 0.45659059286117554, "learning_rate": 2.8219102412324028e-06, "loss": 0.4582, "step": 4837 }, { "epoch": 2.0376245963779307, "grad_norm": 0.4193681478500366, "learning_rate": 2.8197043499304634e-06, "loss": 0.3752, "step": 4838 }, { "epoch": 2.0380457672329073, "grad_norm": 0.42523717880249023, "learning_rate": 2.8174989825118553e-06, "loss": 0.4657, "step": 4839 }, { "epoch": 2.0384669380878844, "grad_norm": 0.3660210967063904, "learning_rate": 2.8152941395064876e-06, "loss": 0.3946, "step": 4840 }, { "epoch": 2.038888108942861, "grad_norm": 0.3526037633419037, "learning_rate": 2.813089821444144e-06, "loss": 0.3695, "step": 4841 }, { "epoch": 2.039309279797838, "grad_norm": 0.39316362142562866, "learning_rate": 2.810886028854478e-06, "loss": 0.4119, "step": 4842 }, { "epoch": 2.0397304506528147, "grad_norm": 0.42961645126342773, "learning_rate": 2.8086827622670164e-06, "loss": 0.396, "step": 4843 }, { "epoch": 2.0401516215077917, "grad_norm": 0.40929919481277466, "learning_rate": 2.8064800222111673e-06, "loss": 0.4522, "step": 4844 }, { "epoch": 2.0405727923627683, "grad_norm": 0.3612864911556244, "learning_rate": 2.8042778092162016e-06, "loss": 0.3227, "step": 4845 }, { "epoch": 2.0409939632177454, "grad_norm": 0.41978955268859863, "learning_rate": 2.802076123811273e-06, "loss": 0.4304, "step": 4846 }, { "epoch": 2.041415134072722, "grad_norm": 0.42307737469673157, "learning_rate": 2.7998749665254032e-06, "loss": 0.4398, "step": 4847 }, { "epoch": 2.041836304927699, "grad_norm": 0.44520995020866394, "learning_rate": 2.797674337887486e-06, "loss": 0.4103, "step": 4848 }, { "epoch": 2.0422574757826757, "grad_norm": 0.40664801001548767, "learning_rate": 2.7954742384262944e-06, "loss": 0.4001, "step": 4849 }, { "epoch": 2.0426786466376528, "grad_norm": 0.43990832567214966, "learning_rate": 2.793274668670466e-06, "loss": 0.442, "step": 4850 }, { "epoch": 2.0430998174926294, "grad_norm": 0.39959004521369934, "learning_rate": 2.791075629148519e-06, "loss": 0.3669, "step": 4851 }, { "epoch": 2.0435209883476064, "grad_norm": 0.43050533533096313, "learning_rate": 2.7888771203888386e-06, "loss": 0.4224, "step": 4852 }, { "epoch": 2.043942159202583, "grad_norm": 0.41231125593185425, "learning_rate": 2.7866791429196837e-06, "loss": 0.3897, "step": 4853 }, { "epoch": 2.04436333005756, "grad_norm": 0.3994552791118622, "learning_rate": 2.784481697269188e-06, "loss": 0.396, "step": 4854 }, { "epoch": 2.0447845009125367, "grad_norm": 0.43916428089141846, "learning_rate": 2.7822847839653537e-06, "loss": 0.4019, "step": 4855 }, { "epoch": 2.0452056717675138, "grad_norm": 0.4897913634777069, "learning_rate": 2.7800884035360578e-06, "loss": 0.455, "step": 4856 }, { "epoch": 2.0456268426224904, "grad_norm": 0.3605904281139374, "learning_rate": 2.777892556509053e-06, "loss": 0.3402, "step": 4857 }, { "epoch": 2.0460480134774675, "grad_norm": 0.389801561832428, "learning_rate": 2.7756972434119513e-06, "loss": 0.4367, "step": 4858 }, { "epoch": 2.046469184332444, "grad_norm": 0.41508209705352783, "learning_rate": 2.7735024647722482e-06, "loss": 0.4169, "step": 4859 }, { "epoch": 2.046890355187421, "grad_norm": 0.5081483125686646, "learning_rate": 2.771308221117309e-06, "loss": 0.4575, "step": 4860 }, { "epoch": 2.0473115260423977, "grad_norm": 0.41169559955596924, "learning_rate": 2.7691145129743645e-06, "loss": 0.3606, "step": 4861 }, { "epoch": 2.047732696897375, "grad_norm": 0.44729503989219666, "learning_rate": 2.7669213408705255e-06, "loss": 0.3994, "step": 4862 }, { "epoch": 2.0481538677523514, "grad_norm": 0.418405681848526, "learning_rate": 2.7647287053327664e-06, "loss": 0.4336, "step": 4863 }, { "epoch": 2.0485750386073285, "grad_norm": 0.4274282455444336, "learning_rate": 2.7625366068879343e-06, "loss": 0.4166, "step": 4864 }, { "epoch": 2.048996209462305, "grad_norm": 0.49972856044769287, "learning_rate": 2.760345046062752e-06, "loss": 0.4409, "step": 4865 }, { "epoch": 2.049417380317282, "grad_norm": 0.3804597854614258, "learning_rate": 2.758154023383808e-06, "loss": 0.3551, "step": 4866 }, { "epoch": 2.0498385511722588, "grad_norm": 0.4687478244304657, "learning_rate": 2.75596353937756e-06, "loss": 0.4572, "step": 4867 }, { "epoch": 2.050259722027236, "grad_norm": 0.4238738417625427, "learning_rate": 2.753773594570345e-06, "loss": 0.4123, "step": 4868 }, { "epoch": 2.0506808928822124, "grad_norm": 0.3835239112377167, "learning_rate": 2.75158418948836e-06, "loss": 0.4165, "step": 4869 }, { "epoch": 2.0511020637371895, "grad_norm": 0.41391241550445557, "learning_rate": 2.749395324657681e-06, "loss": 0.3796, "step": 4870 }, { "epoch": 2.051523234592166, "grad_norm": 0.4262717366218567, "learning_rate": 2.747207000604249e-06, "loss": 0.3988, "step": 4871 }, { "epoch": 2.051944405447143, "grad_norm": 0.451145201921463, "learning_rate": 2.7450192178538737e-06, "loss": 0.4411, "step": 4872 }, { "epoch": 2.05236557630212, "grad_norm": 0.40215635299682617, "learning_rate": 2.742831976932242e-06, "loss": 0.386, "step": 4873 }, { "epoch": 2.052786747157097, "grad_norm": 0.4595320224761963, "learning_rate": 2.740645278364902e-06, "loss": 0.4673, "step": 4874 }, { "epoch": 2.0532079180120735, "grad_norm": 0.35952553153038025, "learning_rate": 2.7384591226772772e-06, "loss": 0.334, "step": 4875 }, { "epoch": 2.0536290888670505, "grad_norm": 0.42096057534217834, "learning_rate": 2.736273510394663e-06, "loss": 0.4135, "step": 4876 }, { "epoch": 2.054050259722027, "grad_norm": 0.42374563217163086, "learning_rate": 2.7340884420422127e-06, "loss": 0.417, "step": 4877 }, { "epoch": 2.054471430577004, "grad_norm": 0.4555342197418213, "learning_rate": 2.7319039181449604e-06, "loss": 0.4272, "step": 4878 }, { "epoch": 2.054892601431981, "grad_norm": 0.43063515424728394, "learning_rate": 2.729719939227806e-06, "loss": 0.3591, "step": 4879 }, { "epoch": 2.055313772286958, "grad_norm": 0.4565157890319824, "learning_rate": 2.7275365058155156e-06, "loss": 0.4241, "step": 4880 }, { "epoch": 2.0557349431419345, "grad_norm": 0.42263171076774597, "learning_rate": 2.7253536184327316e-06, "loss": 0.4109, "step": 4881 }, { "epoch": 2.0561561139969116, "grad_norm": 0.35943666100502014, "learning_rate": 2.7231712776039526e-06, "loss": 0.3872, "step": 4882 }, { "epoch": 2.056577284851888, "grad_norm": 0.41124477982521057, "learning_rate": 2.7209894838535567e-06, "loss": 0.4164, "step": 4883 }, { "epoch": 2.0569984557068652, "grad_norm": 0.4516177177429199, "learning_rate": 2.71880823770579e-06, "loss": 0.4344, "step": 4884 }, { "epoch": 2.057419626561842, "grad_norm": 0.42022061347961426, "learning_rate": 2.7166275396847597e-06, "loss": 0.3792, "step": 4885 }, { "epoch": 2.057840797416819, "grad_norm": 0.4288325607776642, "learning_rate": 2.7144473903144495e-06, "loss": 0.4333, "step": 4886 }, { "epoch": 2.0582619682717955, "grad_norm": 0.3906567394733429, "learning_rate": 2.7122677901187067e-06, "loss": 0.3753, "step": 4887 }, { "epoch": 2.0586831391267726, "grad_norm": 0.46577560901641846, "learning_rate": 2.710088739621244e-06, "loss": 0.4312, "step": 4888 }, { "epoch": 2.059104309981749, "grad_norm": 0.427718847990036, "learning_rate": 2.7079102393456503e-06, "loss": 0.394, "step": 4889 }, { "epoch": 2.0595254808367263, "grad_norm": 0.4400129020214081, "learning_rate": 2.7057322898153737e-06, "loss": 0.4205, "step": 4890 }, { "epoch": 2.059946651691703, "grad_norm": 0.41357216238975525, "learning_rate": 2.703554891553738e-06, "loss": 0.3803, "step": 4891 }, { "epoch": 2.06036782254668, "grad_norm": 0.3784005641937256, "learning_rate": 2.701378045083928e-06, "loss": 0.3478, "step": 4892 }, { "epoch": 2.0607889934016566, "grad_norm": 0.39933034777641296, "learning_rate": 2.6992017509289962e-06, "loss": 0.4168, "step": 4893 }, { "epoch": 2.0612101642566336, "grad_norm": 0.4660130739212036, "learning_rate": 2.6970260096118665e-06, "loss": 0.4806, "step": 4894 }, { "epoch": 2.0616313351116102, "grad_norm": 0.3697067201137543, "learning_rate": 2.6948508216553304e-06, "loss": 0.4143, "step": 4895 }, { "epoch": 2.0620525059665873, "grad_norm": 0.36684471368789673, "learning_rate": 2.6926761875820406e-06, "loss": 0.3704, "step": 4896 }, { "epoch": 2.062473676821564, "grad_norm": 0.4316667318344116, "learning_rate": 2.6905021079145216e-06, "loss": 0.4291, "step": 4897 }, { "epoch": 2.062894847676541, "grad_norm": 0.39710724353790283, "learning_rate": 2.6883285831751603e-06, "loss": 0.3902, "step": 4898 }, { "epoch": 2.0633160185315176, "grad_norm": 0.40946516394615173, "learning_rate": 2.6861556138862155e-06, "loss": 0.4578, "step": 4899 }, { "epoch": 2.0637371893864946, "grad_norm": 0.43687137961387634, "learning_rate": 2.683983200569812e-06, "loss": 0.4146, "step": 4900 }, { "epoch": 2.0641583602414713, "grad_norm": 0.39139753580093384, "learning_rate": 2.681811343747937e-06, "loss": 0.3993, "step": 4901 }, { "epoch": 2.0645795310964483, "grad_norm": 0.4079909920692444, "learning_rate": 2.6796400439424442e-06, "loss": 0.4192, "step": 4902 }, { "epoch": 2.065000701951425, "grad_norm": 0.41649630665779114, "learning_rate": 2.6774693016750595e-06, "loss": 0.4038, "step": 4903 }, { "epoch": 2.065421872806402, "grad_norm": 0.3702976107597351, "learning_rate": 2.675299117467367e-06, "loss": 0.3613, "step": 4904 }, { "epoch": 2.0658430436613786, "grad_norm": 0.4178467392921448, "learning_rate": 2.673129491840824e-06, "loss": 0.407, "step": 4905 }, { "epoch": 2.0662642145163557, "grad_norm": 0.43251585960388184, "learning_rate": 2.6709604253167486e-06, "loss": 0.417, "step": 4906 }, { "epoch": 2.0666853853713323, "grad_norm": 0.44242745637893677, "learning_rate": 2.668791918416323e-06, "loss": 0.3761, "step": 4907 }, { "epoch": 2.0671065562263093, "grad_norm": 0.4223548173904419, "learning_rate": 2.6666239716606025e-06, "loss": 0.4106, "step": 4908 }, { "epoch": 2.067527727081286, "grad_norm": 0.4244785010814667, "learning_rate": 2.6644565855704996e-06, "loss": 0.4255, "step": 4909 }, { "epoch": 2.067948897936263, "grad_norm": 0.3762212097644806, "learning_rate": 2.6622897606667996e-06, "loss": 0.3755, "step": 4910 }, { "epoch": 2.0683700687912396, "grad_norm": 0.39279454946517944, "learning_rate": 2.6601234974701462e-06, "loss": 0.4006, "step": 4911 }, { "epoch": 2.0687912396462167, "grad_norm": 0.4252297282218933, "learning_rate": 2.65795779650105e-06, "loss": 0.4127, "step": 4912 }, { "epoch": 2.0692124105011933, "grad_norm": 0.4330768287181854, "learning_rate": 2.655792658279891e-06, "loss": 0.4053, "step": 4913 }, { "epoch": 2.06963358135617, "grad_norm": 0.4336128532886505, "learning_rate": 2.6536280833269067e-06, "loss": 0.4098, "step": 4914 }, { "epoch": 2.070054752211147, "grad_norm": 0.3811756372451782, "learning_rate": 2.651464072162205e-06, "loss": 0.3896, "step": 4915 }, { "epoch": 2.070475923066124, "grad_norm": 0.3694816529750824, "learning_rate": 2.64930062530576e-06, "loss": 0.3954, "step": 4916 }, { "epoch": 2.0708970939211007, "grad_norm": 0.3906365633010864, "learning_rate": 2.6471377432773993e-06, "loss": 0.3746, "step": 4917 }, { "epoch": 2.0713182647760773, "grad_norm": 0.4226459562778473, "learning_rate": 2.6449754265968263e-06, "loss": 0.4364, "step": 4918 }, { "epoch": 2.0717394356310543, "grad_norm": 0.37728410959243774, "learning_rate": 2.6428136757836047e-06, "loss": 0.3511, "step": 4919 }, { "epoch": 2.072160606486031, "grad_norm": 0.40027356147766113, "learning_rate": 2.6406524913571596e-06, "loss": 0.3871, "step": 4920 }, { "epoch": 2.072581777341008, "grad_norm": 0.4583442509174347, "learning_rate": 2.6384918738367854e-06, "loss": 0.434, "step": 4921 }, { "epoch": 2.0730029481959846, "grad_norm": 0.3846156895160675, "learning_rate": 2.6363318237416358e-06, "loss": 0.3974, "step": 4922 }, { "epoch": 2.0734241190509617, "grad_norm": 0.40945330262184143, "learning_rate": 2.634172341590726e-06, "loss": 0.4464, "step": 4923 }, { "epoch": 2.0738452899059383, "grad_norm": 0.3888200521469116, "learning_rate": 2.632013427902944e-06, "loss": 0.3992, "step": 4924 }, { "epoch": 2.0742664607609154, "grad_norm": 0.4027702212333679, "learning_rate": 2.629855083197031e-06, "loss": 0.3751, "step": 4925 }, { "epoch": 2.074687631615892, "grad_norm": 0.43711015582084656, "learning_rate": 2.627697307991599e-06, "loss": 0.4106, "step": 4926 }, { "epoch": 2.075108802470869, "grad_norm": 0.424129456281662, "learning_rate": 2.6255401028051197e-06, "loss": 0.3934, "step": 4927 }, { "epoch": 2.0755299733258457, "grad_norm": 0.4597075283527374, "learning_rate": 2.6233834681559247e-06, "loss": 0.4124, "step": 4928 }, { "epoch": 2.0759511441808227, "grad_norm": 0.4085974097251892, "learning_rate": 2.6212274045622167e-06, "loss": 0.4173, "step": 4929 }, { "epoch": 2.0763723150357993, "grad_norm": 0.39812585711479187, "learning_rate": 2.6190719125420526e-06, "loss": 0.3824, "step": 4930 }, { "epoch": 2.0767934858907764, "grad_norm": 0.45943230390548706, "learning_rate": 2.6169169926133595e-06, "loss": 0.4293, "step": 4931 }, { "epoch": 2.077214656745753, "grad_norm": 0.3926668167114258, "learning_rate": 2.614762645293923e-06, "loss": 0.3771, "step": 4932 }, { "epoch": 2.07763582760073, "grad_norm": 0.38863644003868103, "learning_rate": 2.6126088711013876e-06, "loss": 0.374, "step": 4933 }, { "epoch": 2.0780569984557067, "grad_norm": 0.46060168743133545, "learning_rate": 2.6104556705532664e-06, "loss": 0.4391, "step": 4934 }, { "epoch": 2.0784781693106837, "grad_norm": 0.3919086456298828, "learning_rate": 2.6083030441669342e-06, "loss": 0.3644, "step": 4935 }, { "epoch": 2.0788993401656604, "grad_norm": 0.4313916563987732, "learning_rate": 2.6061509924596253e-06, "loss": 0.3852, "step": 4936 }, { "epoch": 2.0793205110206374, "grad_norm": 0.46195298433303833, "learning_rate": 2.6039995159484334e-06, "loss": 0.413, "step": 4937 }, { "epoch": 2.079741681875614, "grad_norm": 0.4277283251285553, "learning_rate": 2.6018486151503213e-06, "loss": 0.4134, "step": 4938 }, { "epoch": 2.080162852730591, "grad_norm": 0.4172375500202179, "learning_rate": 2.5996982905821054e-06, "loss": 0.4108, "step": 4939 }, { "epoch": 2.0805840235855677, "grad_norm": 0.4383775293827057, "learning_rate": 2.597548542760472e-06, "loss": 0.4019, "step": 4940 }, { "epoch": 2.0810051944405448, "grad_norm": 0.43266046047210693, "learning_rate": 2.595399372201961e-06, "loss": 0.3906, "step": 4941 }, { "epoch": 2.0814263652955214, "grad_norm": 0.3892519772052765, "learning_rate": 2.5932507794229766e-06, "loss": 0.4033, "step": 4942 }, { "epoch": 2.0818475361504984, "grad_norm": 0.4002271890640259, "learning_rate": 2.5911027649397872e-06, "loss": 0.3921, "step": 4943 }, { "epoch": 2.082268707005475, "grad_norm": 0.4516834020614624, "learning_rate": 2.5889553292685164e-06, "loss": 0.4035, "step": 4944 }, { "epoch": 2.082689877860452, "grad_norm": 0.4972728192806244, "learning_rate": 2.5868084729251563e-06, "loss": 0.4328, "step": 4945 }, { "epoch": 2.0831110487154287, "grad_norm": 0.4313730001449585, "learning_rate": 2.5846621964255524e-06, "loss": 0.3888, "step": 4946 }, { "epoch": 2.083532219570406, "grad_norm": 0.44571587443351746, "learning_rate": 2.5825165002854124e-06, "loss": 0.4704, "step": 4947 }, { "epoch": 2.0839533904253824, "grad_norm": 0.4736853539943695, "learning_rate": 2.5803713850203095e-06, "loss": 0.4086, "step": 4948 }, { "epoch": 2.0843745612803595, "grad_norm": 0.41584154963493347, "learning_rate": 2.578226851145671e-06, "loss": 0.4198, "step": 4949 }, { "epoch": 2.084795732135336, "grad_norm": 0.4271745979785919, "learning_rate": 2.576082899176788e-06, "loss": 0.4407, "step": 4950 }, { "epoch": 2.085216902990313, "grad_norm": 0.38202834129333496, "learning_rate": 2.5739395296288163e-06, "loss": 0.374, "step": 4951 }, { "epoch": 2.0856380738452898, "grad_norm": 0.4165836572647095, "learning_rate": 2.571796743016758e-06, "loss": 0.4271, "step": 4952 }, { "epoch": 2.086059244700267, "grad_norm": 0.4066937565803528, "learning_rate": 2.5696545398554873e-06, "loss": 0.357, "step": 4953 }, { "epoch": 2.0864804155552434, "grad_norm": 0.3876607120037079, "learning_rate": 2.5675129206597377e-06, "loss": 0.3839, "step": 4954 }, { "epoch": 2.0869015864102205, "grad_norm": 0.4106100797653198, "learning_rate": 2.5653718859440945e-06, "loss": 0.3848, "step": 4955 }, { "epoch": 2.087322757265197, "grad_norm": 0.4623095393180847, "learning_rate": 2.5632314362230136e-06, "loss": 0.4412, "step": 4956 }, { "epoch": 2.087743928120174, "grad_norm": 0.44691717624664307, "learning_rate": 2.5610915720107966e-06, "loss": 0.395, "step": 4957 }, { "epoch": 2.088165098975151, "grad_norm": 0.39540231227874756, "learning_rate": 2.5589522938216156e-06, "loss": 0.3924, "step": 4958 }, { "epoch": 2.088586269830128, "grad_norm": 0.42186036705970764, "learning_rate": 2.5568136021695e-06, "loss": 0.3869, "step": 4959 }, { "epoch": 2.0890074406851045, "grad_norm": 0.3939616084098816, "learning_rate": 2.554675497568332e-06, "loss": 0.3995, "step": 4960 }, { "epoch": 2.0894286115400815, "grad_norm": 0.40989241003990173, "learning_rate": 2.552537980531863e-06, "loss": 0.4129, "step": 4961 }, { "epoch": 2.089849782395058, "grad_norm": 0.43379342555999756, "learning_rate": 2.550401051573693e-06, "loss": 0.3909, "step": 4962 }, { "epoch": 2.090270953250035, "grad_norm": 0.40355533361434937, "learning_rate": 2.5482647112072857e-06, "loss": 0.4113, "step": 4963 }, { "epoch": 2.090692124105012, "grad_norm": 0.41344961524009705, "learning_rate": 2.546128959945965e-06, "loss": 0.4023, "step": 4964 }, { "epoch": 2.091113294959989, "grad_norm": 0.4390982985496521, "learning_rate": 2.5439937983029073e-06, "loss": 0.4555, "step": 4965 }, { "epoch": 2.0915344658149655, "grad_norm": 0.46997004747390747, "learning_rate": 2.5418592267911557e-06, "loss": 0.4055, "step": 4966 }, { "epoch": 2.0919556366699426, "grad_norm": 0.43010783195495605, "learning_rate": 2.5397252459236043e-06, "loss": 0.4379, "step": 4967 }, { "epoch": 2.092376807524919, "grad_norm": 0.39762312173843384, "learning_rate": 2.537591856213007e-06, "loss": 0.3666, "step": 4968 }, { "epoch": 2.0927979783798962, "grad_norm": 0.4098013937473297, "learning_rate": 2.5354590581719795e-06, "loss": 0.413, "step": 4969 }, { "epoch": 2.093219149234873, "grad_norm": 0.47215867042541504, "learning_rate": 2.5333268523129905e-06, "loss": 0.471, "step": 4970 }, { "epoch": 2.09364032008985, "grad_norm": 0.47243815660476685, "learning_rate": 2.5311952391483665e-06, "loss": 0.4291, "step": 4971 }, { "epoch": 2.0940614909448265, "grad_norm": 0.4147830903530121, "learning_rate": 2.529064219190297e-06, "loss": 0.3819, "step": 4972 }, { "epoch": 2.0944826617998036, "grad_norm": 0.3981468081474304, "learning_rate": 2.5269337929508212e-06, "loss": 0.4488, "step": 4973 }, { "epoch": 2.09490383265478, "grad_norm": 0.38637420535087585, "learning_rate": 2.524803960941843e-06, "loss": 0.4006, "step": 4974 }, { "epoch": 2.0953250035097573, "grad_norm": 0.451362669467926, "learning_rate": 2.5226747236751224e-06, "loss": 0.3976, "step": 4975 }, { "epoch": 2.095746174364734, "grad_norm": 0.40969565510749817, "learning_rate": 2.5205460816622684e-06, "loss": 0.3829, "step": 4976 }, { "epoch": 2.096167345219711, "grad_norm": 0.399692565202713, "learning_rate": 2.518418035414756e-06, "loss": 0.3741, "step": 4977 }, { "epoch": 2.0965885160746875, "grad_norm": 0.4562074840068817, "learning_rate": 2.5162905854439156e-06, "loss": 0.4425, "step": 4978 }, { "epoch": 2.0970096869296646, "grad_norm": 0.4234004616737366, "learning_rate": 2.514163732260929e-06, "loss": 0.401, "step": 4979 }, { "epoch": 2.0974308577846412, "grad_norm": 0.39661017060279846, "learning_rate": 2.5120374763768422e-06, "loss": 0.3829, "step": 4980 }, { "epoch": 2.0978520286396183, "grad_norm": 0.4201732575893402, "learning_rate": 2.5099118183025527e-06, "loss": 0.4008, "step": 4981 }, { "epoch": 2.098273199494595, "grad_norm": 0.41003283858299255, "learning_rate": 2.5077867585488125e-06, "loss": 0.3705, "step": 4982 }, { "epoch": 2.098694370349572, "grad_norm": 0.43439674377441406, "learning_rate": 2.505662297626237e-06, "loss": 0.3625, "step": 4983 }, { "epoch": 2.0991155412045486, "grad_norm": 0.47497743368148804, "learning_rate": 2.5035384360452895e-06, "loss": 0.4316, "step": 4984 }, { "epoch": 2.0995367120595256, "grad_norm": 0.39968007802963257, "learning_rate": 2.5014151743162974e-06, "loss": 0.4026, "step": 4985 }, { "epoch": 2.0999578829145022, "grad_norm": 0.3722893297672272, "learning_rate": 2.4992925129494383e-06, "loss": 0.3594, "step": 4986 }, { "epoch": 2.1003790537694793, "grad_norm": 0.4128897488117218, "learning_rate": 2.497170452454745e-06, "loss": 0.4256, "step": 4987 }, { "epoch": 2.100800224624456, "grad_norm": 0.472626268863678, "learning_rate": 2.4950489933421107e-06, "loss": 0.4528, "step": 4988 }, { "epoch": 2.101221395479433, "grad_norm": 0.39001163840293884, "learning_rate": 2.4929281361212793e-06, "loss": 0.348, "step": 4989 }, { "epoch": 2.1016425663344096, "grad_norm": 0.40572044253349304, "learning_rate": 2.490807881301855e-06, "loss": 0.3884, "step": 4990 }, { "epoch": 2.1020637371893867, "grad_norm": 0.40927648544311523, "learning_rate": 2.488688229393293e-06, "loss": 0.4194, "step": 4991 }, { "epoch": 2.1024849080443633, "grad_norm": 0.40671658515930176, "learning_rate": 2.486569180904903e-06, "loss": 0.3839, "step": 4992 }, { "epoch": 2.1029060788993403, "grad_norm": 0.4341135323047638, "learning_rate": 2.4844507363458543e-06, "loss": 0.3728, "step": 4993 }, { "epoch": 2.103327249754317, "grad_norm": 0.48137420415878296, "learning_rate": 2.482332896225169e-06, "loss": 0.4913, "step": 4994 }, { "epoch": 2.103748420609294, "grad_norm": 0.41497495770454407, "learning_rate": 2.4802156610517236e-06, "loss": 0.3548, "step": 4995 }, { "epoch": 2.1041695914642706, "grad_norm": 0.4074244499206543, "learning_rate": 2.4780990313342456e-06, "loss": 0.366, "step": 4996 }, { "epoch": 2.1045907623192477, "grad_norm": 0.4351541996002197, "learning_rate": 2.475983007581326e-06, "loss": 0.4287, "step": 4997 }, { "epoch": 2.1050119331742243, "grad_norm": 0.42866384983062744, "learning_rate": 2.4738675903014e-06, "loss": 0.4237, "step": 4998 }, { "epoch": 2.1054331040292014, "grad_norm": 0.45597153902053833, "learning_rate": 2.471752780002765e-06, "loss": 0.4029, "step": 4999 }, { "epoch": 2.105854274884178, "grad_norm": 0.43008920550346375, "learning_rate": 2.469638577193569e-06, "loss": 0.3969, "step": 5000 }, { "epoch": 2.106275445739155, "grad_norm": 0.4212562143802643, "learning_rate": 2.4675249823818127e-06, "loss": 0.3881, "step": 5001 }, { "epoch": 2.1066966165941317, "grad_norm": 0.42144855856895447, "learning_rate": 2.4654119960753555e-06, "loss": 0.4061, "step": 5002 }, { "epoch": 2.1071177874491087, "grad_norm": 0.43265870213508606, "learning_rate": 2.4632996187819036e-06, "loss": 0.3901, "step": 5003 }, { "epoch": 2.1075389583040853, "grad_norm": 0.4415675401687622, "learning_rate": 2.4611878510090246e-06, "loss": 0.4063, "step": 5004 }, { "epoch": 2.1079601291590624, "grad_norm": 0.4104728698730469, "learning_rate": 2.4590766932641353e-06, "loss": 0.4301, "step": 5005 }, { "epoch": 2.108381300014039, "grad_norm": 0.3926234841346741, "learning_rate": 2.456966146054503e-06, "loss": 0.3509, "step": 5006 }, { "epoch": 2.108802470869016, "grad_norm": 0.43235889077186584, "learning_rate": 2.454856209887257e-06, "loss": 0.4275, "step": 5007 }, { "epoch": 2.1092236417239927, "grad_norm": 0.43162500858306885, "learning_rate": 2.4527468852693696e-06, "loss": 0.3876, "step": 5008 }, { "epoch": 2.1096448125789697, "grad_norm": 0.44599708914756775, "learning_rate": 2.4506381727076734e-06, "loss": 0.4285, "step": 5009 }, { "epoch": 2.1100659834339464, "grad_norm": 0.42517271637916565, "learning_rate": 2.4485300727088562e-06, "loss": 0.4108, "step": 5010 }, { "epoch": 2.1104871542889234, "grad_norm": 0.4462931156158447, "learning_rate": 2.4464225857794453e-06, "loss": 0.4095, "step": 5011 }, { "epoch": 2.1109083251439, "grad_norm": 0.40098899602890015, "learning_rate": 2.4443157124258347e-06, "loss": 0.4446, "step": 5012 }, { "epoch": 2.111329495998877, "grad_norm": 0.41141077876091003, "learning_rate": 2.4422094531542667e-06, "loss": 0.3865, "step": 5013 }, { "epoch": 2.1117506668538537, "grad_norm": 0.38231855630874634, "learning_rate": 2.4401038084708313e-06, "loss": 0.3732, "step": 5014 }, { "epoch": 2.1121718377088303, "grad_norm": 0.44360464811325073, "learning_rate": 2.4379987788814807e-06, "loss": 0.4126, "step": 5015 }, { "epoch": 2.1125930085638074, "grad_norm": 0.4207499623298645, "learning_rate": 2.4358943648920055e-06, "loss": 0.4524, "step": 5016 }, { "epoch": 2.1130141794187844, "grad_norm": 0.4127615988254547, "learning_rate": 2.4337905670080596e-06, "loss": 0.419, "step": 5017 }, { "epoch": 2.113435350273761, "grad_norm": 0.3912132680416107, "learning_rate": 2.431687385735148e-06, "loss": 0.3652, "step": 5018 }, { "epoch": 2.1138565211287377, "grad_norm": 0.41044723987579346, "learning_rate": 2.4295848215786204e-06, "loss": 0.4121, "step": 5019 }, { "epoch": 2.1142776919837147, "grad_norm": 0.43018582463264465, "learning_rate": 2.427482875043687e-06, "loss": 0.3869, "step": 5020 }, { "epoch": 2.1146988628386914, "grad_norm": 0.40514710545539856, "learning_rate": 2.425381546635403e-06, "loss": 0.3925, "step": 5021 }, { "epoch": 2.1151200336936684, "grad_norm": 0.4467380940914154, "learning_rate": 2.4232808368586764e-06, "loss": 0.4742, "step": 5022 }, { "epoch": 2.115541204548645, "grad_norm": 0.41179588437080383, "learning_rate": 2.4211807462182707e-06, "loss": 0.3927, "step": 5023 }, { "epoch": 2.115962375403622, "grad_norm": 0.37388888001441956, "learning_rate": 2.419081275218793e-06, "loss": 0.4058, "step": 5024 }, { "epoch": 2.1163835462585987, "grad_norm": 0.3992406129837036, "learning_rate": 2.4169824243647117e-06, "loss": 0.4206, "step": 5025 }, { "epoch": 2.1168047171135758, "grad_norm": 0.45950037240982056, "learning_rate": 2.414884194160337e-06, "loss": 0.3953, "step": 5026 }, { "epoch": 2.1172258879685524, "grad_norm": 0.43327322602272034, "learning_rate": 2.412786585109832e-06, "loss": 0.4174, "step": 5027 }, { "epoch": 2.1176470588235294, "grad_norm": 0.3933418393135071, "learning_rate": 2.4106895977172157e-06, "loss": 0.4022, "step": 5028 }, { "epoch": 2.118068229678506, "grad_norm": 0.3918895423412323, "learning_rate": 2.4085932324863508e-06, "loss": 0.3638, "step": 5029 }, { "epoch": 2.118489400533483, "grad_norm": 0.4582247734069824, "learning_rate": 2.4064974899209576e-06, "loss": 0.4024, "step": 5030 }, { "epoch": 2.1189105713884597, "grad_norm": 0.4293513894081116, "learning_rate": 2.4044023705246e-06, "loss": 0.3784, "step": 5031 }, { "epoch": 2.119331742243437, "grad_norm": 0.389858603477478, "learning_rate": 2.402307874800694e-06, "loss": 0.3621, "step": 5032 }, { "epoch": 2.1197529130984134, "grad_norm": 0.4153488874435425, "learning_rate": 2.400214003252509e-06, "loss": 0.4168, "step": 5033 }, { "epoch": 2.1201740839533905, "grad_norm": 0.47169849276542664, "learning_rate": 2.3981207563831633e-06, "loss": 0.4007, "step": 5034 }, { "epoch": 2.120595254808367, "grad_norm": 0.4849715530872345, "learning_rate": 2.396028134695623e-06, "loss": 0.4149, "step": 5035 }, { "epoch": 2.121016425663344, "grad_norm": 0.40548238158226013, "learning_rate": 2.3939361386927028e-06, "loss": 0.3969, "step": 5036 }, { "epoch": 2.1214375965183208, "grad_norm": 0.40045103430747986, "learning_rate": 2.391844768877073e-06, "loss": 0.4217, "step": 5037 }, { "epoch": 2.121858767373298, "grad_norm": 0.4193189740180969, "learning_rate": 2.389754025751246e-06, "loss": 0.3981, "step": 5038 }, { "epoch": 2.1222799382282744, "grad_norm": 0.3915969729423523, "learning_rate": 2.387663909817592e-06, "loss": 0.3673, "step": 5039 }, { "epoch": 2.1227011090832515, "grad_norm": 0.37542295455932617, "learning_rate": 2.3855744215783227e-06, "loss": 0.3573, "step": 5040 }, { "epoch": 2.123122279938228, "grad_norm": 0.405781626701355, "learning_rate": 2.3834855615355007e-06, "loss": 0.4164, "step": 5041 }, { "epoch": 2.123543450793205, "grad_norm": 0.39751556515693665, "learning_rate": 2.3813973301910432e-06, "loss": 0.4014, "step": 5042 }, { "epoch": 2.123964621648182, "grad_norm": 0.44457775354385376, "learning_rate": 2.3793097280467083e-06, "loss": 0.4352, "step": 5043 }, { "epoch": 2.124385792503159, "grad_norm": 0.45718318223953247, "learning_rate": 2.3772227556041106e-06, "loss": 0.4505, "step": 5044 }, { "epoch": 2.1248069633581355, "grad_norm": 0.4524637460708618, "learning_rate": 2.3751364133647076e-06, "loss": 0.4238, "step": 5045 }, { "epoch": 2.1252281342131125, "grad_norm": 0.4152831733226776, "learning_rate": 2.373050701829806e-06, "loss": 0.3824, "step": 5046 }, { "epoch": 2.125649305068089, "grad_norm": 0.42404109239578247, "learning_rate": 2.3709656215005655e-06, "loss": 0.4452, "step": 5047 }, { "epoch": 2.126070475923066, "grad_norm": 0.42221102118492126, "learning_rate": 2.3688811728779875e-06, "loss": 0.3923, "step": 5048 }, { "epoch": 2.126491646778043, "grad_norm": 0.4098755419254303, "learning_rate": 2.3667973564629277e-06, "loss": 0.3941, "step": 5049 }, { "epoch": 2.12691281763302, "grad_norm": 0.4009099304676056, "learning_rate": 2.36471417275609e-06, "loss": 0.4245, "step": 5050 }, { "epoch": 2.1273339884879965, "grad_norm": 0.43638044595718384, "learning_rate": 2.362631622258017e-06, "loss": 0.4005, "step": 5051 }, { "epoch": 2.1277551593429735, "grad_norm": 0.45872437953948975, "learning_rate": 2.3605497054691083e-06, "loss": 0.3755, "step": 5052 }, { "epoch": 2.12817633019795, "grad_norm": 0.4090348184108734, "learning_rate": 2.358468422889612e-06, "loss": 0.3824, "step": 5053 }, { "epoch": 2.128597501052927, "grad_norm": 0.38676154613494873, "learning_rate": 2.356387775019616e-06, "loss": 0.3904, "step": 5054 }, { "epoch": 2.129018671907904, "grad_norm": 0.4467095732688904, "learning_rate": 2.3543077623590637e-06, "loss": 0.4403, "step": 5055 }, { "epoch": 2.129439842762881, "grad_norm": 0.43531253933906555, "learning_rate": 2.352228385407741e-06, "loss": 0.3973, "step": 5056 }, { "epoch": 2.1298610136178575, "grad_norm": 0.44115087389945984, "learning_rate": 2.35014964466528e-06, "loss": 0.4301, "step": 5057 }, { "epoch": 2.1302821844728346, "grad_norm": 0.41990628838539124, "learning_rate": 2.3480715406311667e-06, "loss": 0.4085, "step": 5058 }, { "epoch": 2.130703355327811, "grad_norm": 0.45263102650642395, "learning_rate": 2.345994073804725e-06, "loss": 0.396, "step": 5059 }, { "epoch": 2.1311245261827882, "grad_norm": 0.4324159622192383, "learning_rate": 2.3439172446851353e-06, "loss": 0.439, "step": 5060 }, { "epoch": 2.131545697037765, "grad_norm": 0.47238484025001526, "learning_rate": 2.3418410537714174e-06, "loss": 0.4113, "step": 5061 }, { "epoch": 2.131966867892742, "grad_norm": 0.4725935161113739, "learning_rate": 2.339765501562439e-06, "loss": 0.4882, "step": 5062 }, { "epoch": 2.1323880387477185, "grad_norm": 0.4285915493965149, "learning_rate": 2.3376905885569185e-06, "loss": 0.42, "step": 5063 }, { "epoch": 2.1328092096026956, "grad_norm": 0.40853774547576904, "learning_rate": 2.3356163152534146e-06, "loss": 0.4007, "step": 5064 }, { "epoch": 2.133230380457672, "grad_norm": 0.4481815695762634, "learning_rate": 2.333542682150339e-06, "loss": 0.449, "step": 5065 }, { "epoch": 2.1336515513126493, "grad_norm": 0.39990535378456116, "learning_rate": 2.331469689745944e-06, "loss": 0.3555, "step": 5066 }, { "epoch": 2.134072722167626, "grad_norm": 0.4134680926799774, "learning_rate": 2.3293973385383284e-06, "loss": 0.4231, "step": 5067 }, { "epoch": 2.134493893022603, "grad_norm": 0.4203454852104187, "learning_rate": 2.3273256290254405e-06, "loss": 0.4222, "step": 5068 }, { "epoch": 2.1349150638775796, "grad_norm": 0.41797780990600586, "learning_rate": 2.3252545617050743e-06, "loss": 0.3841, "step": 5069 }, { "epoch": 2.1353362347325566, "grad_norm": 0.48255425691604614, "learning_rate": 2.323184137074866e-06, "loss": 0.436, "step": 5070 }, { "epoch": 2.1357574055875332, "grad_norm": 0.436557412147522, "learning_rate": 2.3211143556322967e-06, "loss": 0.378, "step": 5071 }, { "epoch": 2.1361785764425103, "grad_norm": 0.4769830107688904, "learning_rate": 2.3190452178746993e-06, "loss": 0.3897, "step": 5072 }, { "epoch": 2.136599747297487, "grad_norm": 0.39782431721687317, "learning_rate": 2.3169767242992443e-06, "loss": 0.3717, "step": 5073 }, { "epoch": 2.137020918152464, "grad_norm": 0.3601417541503906, "learning_rate": 2.314908875402956e-06, "loss": 0.314, "step": 5074 }, { "epoch": 2.1374420890074406, "grad_norm": 0.478488564491272, "learning_rate": 2.312841671682693e-06, "loss": 0.4298, "step": 5075 }, { "epoch": 2.1378632598624177, "grad_norm": 0.38737890124320984, "learning_rate": 2.310775113635167e-06, "loss": 0.403, "step": 5076 }, { "epoch": 2.1382844307173943, "grad_norm": 0.4364822506904602, "learning_rate": 2.308709201756935e-06, "loss": 0.4583, "step": 5077 }, { "epoch": 2.1387056015723713, "grad_norm": 0.4298275411128998, "learning_rate": 2.306643936544392e-06, "loss": 0.3936, "step": 5078 }, { "epoch": 2.139126772427348, "grad_norm": 0.43332675099372864, "learning_rate": 2.3045793184937864e-06, "loss": 0.4172, "step": 5079 }, { "epoch": 2.139547943282325, "grad_norm": 0.44595280289649963, "learning_rate": 2.302515348101203e-06, "loss": 0.4081, "step": 5080 }, { "epoch": 2.1399691141373016, "grad_norm": 0.4621932804584503, "learning_rate": 2.300452025862574e-06, "loss": 0.4506, "step": 5081 }, { "epoch": 2.1403902849922787, "grad_norm": 0.39475488662719727, "learning_rate": 2.2983893522736795e-06, "loss": 0.3683, "step": 5082 }, { "epoch": 2.1408114558472553, "grad_norm": 0.4111098051071167, "learning_rate": 2.2963273278301364e-06, "loss": 0.3958, "step": 5083 }, { "epoch": 2.1412326267022324, "grad_norm": 0.39255291223526, "learning_rate": 2.2942659530274132e-06, "loss": 0.3699, "step": 5084 }, { "epoch": 2.141653797557209, "grad_norm": 0.438984215259552, "learning_rate": 2.292205228360818e-06, "loss": 0.4078, "step": 5085 }, { "epoch": 2.142074968412186, "grad_norm": 0.46382877230644226, "learning_rate": 2.2901451543255008e-06, "loss": 0.4892, "step": 5086 }, { "epoch": 2.1424961392671626, "grad_norm": 0.43005144596099854, "learning_rate": 2.288085731416462e-06, "loss": 0.3934, "step": 5087 }, { "epoch": 2.1429173101221397, "grad_norm": 0.4458881914615631, "learning_rate": 2.2860269601285383e-06, "loss": 0.4351, "step": 5088 }, { "epoch": 2.1433384809771163, "grad_norm": 0.43319380283355713, "learning_rate": 2.283968840956416e-06, "loss": 0.4032, "step": 5089 }, { "epoch": 2.1437596518320934, "grad_norm": 0.4504721164703369, "learning_rate": 2.2819113743946205e-06, "loss": 0.3817, "step": 5090 }, { "epoch": 2.14418082268707, "grad_norm": 0.40578311681747437, "learning_rate": 2.279854560937519e-06, "loss": 0.3671, "step": 5091 }, { "epoch": 2.144601993542047, "grad_norm": 0.44761428236961365, "learning_rate": 2.2777984010793264e-06, "loss": 0.4612, "step": 5092 }, { "epoch": 2.1450231643970237, "grad_norm": 0.4143396019935608, "learning_rate": 2.2757428953141014e-06, "loss": 0.413, "step": 5093 }, { "epoch": 2.1454443352520007, "grad_norm": 0.4307277500629425, "learning_rate": 2.27368804413574e-06, "loss": 0.4282, "step": 5094 }, { "epoch": 2.1458655061069773, "grad_norm": 0.4350644052028656, "learning_rate": 2.271633848037982e-06, "loss": 0.4783, "step": 5095 }, { "epoch": 2.1462866769619544, "grad_norm": 0.37526729702949524, "learning_rate": 2.269580307514416e-06, "loss": 0.3629, "step": 5096 }, { "epoch": 2.146707847816931, "grad_norm": 0.426434725522995, "learning_rate": 2.267527423058463e-06, "loss": 0.3804, "step": 5097 }, { "epoch": 2.147129018671908, "grad_norm": 0.4441169202327728, "learning_rate": 2.265475195163397e-06, "loss": 0.4178, "step": 5098 }, { "epoch": 2.1475501895268847, "grad_norm": 0.42522355914115906, "learning_rate": 2.263423624322326e-06, "loss": 0.4374, "step": 5099 }, { "epoch": 2.1479713603818618, "grad_norm": 0.401236891746521, "learning_rate": 2.261372711028203e-06, "loss": 0.4522, "step": 5100 }, { "epoch": 2.1483925312368384, "grad_norm": 0.3755509853363037, "learning_rate": 2.259322455773826e-06, "loss": 0.3554, "step": 5101 }, { "epoch": 2.1488137020918154, "grad_norm": 0.48137983679771423, "learning_rate": 2.2572728590518286e-06, "loss": 0.4378, "step": 5102 }, { "epoch": 2.149234872946792, "grad_norm": 0.446425199508667, "learning_rate": 2.255223921354693e-06, "loss": 0.4001, "step": 5103 }, { "epoch": 2.149656043801769, "grad_norm": 0.4430813491344452, "learning_rate": 2.2531756431747375e-06, "loss": 0.4326, "step": 5104 }, { "epoch": 2.1500772146567457, "grad_norm": 0.45486488938331604, "learning_rate": 2.2511280250041236e-06, "loss": 0.4065, "step": 5105 }, { "epoch": 2.150498385511723, "grad_norm": 0.3968414068222046, "learning_rate": 2.2490810673348578e-06, "loss": 0.3625, "step": 5106 }, { "epoch": 2.1509195563666994, "grad_norm": 0.4365592300891876, "learning_rate": 2.2470347706587815e-06, "loss": 0.4478, "step": 5107 }, { "epoch": 2.1513407272216765, "grad_norm": 0.42778587341308594, "learning_rate": 2.244989135467582e-06, "loss": 0.4031, "step": 5108 }, { "epoch": 2.151761898076653, "grad_norm": 0.7581344246864319, "learning_rate": 2.24294416225279e-06, "loss": 0.4155, "step": 5109 }, { "epoch": 2.15218306893163, "grad_norm": 0.4563269019126892, "learning_rate": 2.2408998515057655e-06, "loss": 0.4047, "step": 5110 }, { "epoch": 2.1526042397866068, "grad_norm": 0.45870354771614075, "learning_rate": 2.2388562037177227e-06, "loss": 0.4894, "step": 5111 }, { "epoch": 2.1530254106415834, "grad_norm": 0.41806983947753906, "learning_rate": 2.2368132193797114e-06, "loss": 0.3761, "step": 5112 }, { "epoch": 2.1534465814965604, "grad_norm": 0.3894679546356201, "learning_rate": 2.234770898982619e-06, "loss": 0.3473, "step": 5113 }, { "epoch": 2.1538677523515375, "grad_norm": 0.4432537257671356, "learning_rate": 2.232729243017179e-06, "loss": 0.4408, "step": 5114 }, { "epoch": 2.154288923206514, "grad_norm": 0.44440487027168274, "learning_rate": 2.2306882519739603e-06, "loss": 0.4169, "step": 5115 }, { "epoch": 2.1547100940614907, "grad_norm": 0.3849603831768036, "learning_rate": 2.228647926343373e-06, "loss": 0.4031, "step": 5116 }, { "epoch": 2.155131264916468, "grad_norm": 0.4916543960571289, "learning_rate": 2.2266082666156707e-06, "loss": 0.4644, "step": 5117 }, { "epoch": 2.155552435771445, "grad_norm": 0.41857093572616577, "learning_rate": 2.2245692732809423e-06, "loss": 0.3998, "step": 5118 }, { "epoch": 2.1559736066264215, "grad_norm": 0.40283897519111633, "learning_rate": 2.2225309468291217e-06, "loss": 0.4205, "step": 5119 }, { "epoch": 2.156394777481398, "grad_norm": 0.4113680422306061, "learning_rate": 2.220493287749978e-06, "loss": 0.4262, "step": 5120 }, { "epoch": 2.156815948336375, "grad_norm": 0.39701637625694275, "learning_rate": 2.2184562965331203e-06, "loss": 0.4419, "step": 5121 }, { "epoch": 2.157237119191352, "grad_norm": 0.3588448464870453, "learning_rate": 2.216419973668002e-06, "loss": 0.3669, "step": 5122 }, { "epoch": 2.157658290046329, "grad_norm": 0.3854846656322479, "learning_rate": 2.2143843196439093e-06, "loss": 0.3686, "step": 5123 }, { "epoch": 2.1580794609013054, "grad_norm": 0.43004855513572693, "learning_rate": 2.212349334949973e-06, "loss": 0.4288, "step": 5124 }, { "epoch": 2.1585006317562825, "grad_norm": 0.3803102970123291, "learning_rate": 2.210315020075161e-06, "loss": 0.4324, "step": 5125 }, { "epoch": 2.158921802611259, "grad_norm": 0.4274151623249054, "learning_rate": 2.2082813755082778e-06, "loss": 0.4351, "step": 5126 }, { "epoch": 2.159342973466236, "grad_norm": 0.4558587670326233, "learning_rate": 2.206248401737971e-06, "loss": 0.4744, "step": 5127 }, { "epoch": 2.1597641443212128, "grad_norm": 0.4400444030761719, "learning_rate": 2.204216099252727e-06, "loss": 0.4242, "step": 5128 }, { "epoch": 2.16018531517619, "grad_norm": 0.4283547103404999, "learning_rate": 2.2021844685408676e-06, "loss": 0.3871, "step": 5129 }, { "epoch": 2.1606064860311665, "grad_norm": 0.44922420382499695, "learning_rate": 2.200153510090553e-06, "loss": 0.4401, "step": 5130 }, { "epoch": 2.1610276568861435, "grad_norm": 0.43268468976020813, "learning_rate": 2.1981232243897878e-06, "loss": 0.4111, "step": 5131 }, { "epoch": 2.16144882774112, "grad_norm": 0.38065221905708313, "learning_rate": 2.196093611926406e-06, "loss": 0.4067, "step": 5132 }, { "epoch": 2.161869998596097, "grad_norm": 0.3945420980453491, "learning_rate": 2.1940646731880887e-06, "loss": 0.3952, "step": 5133 }, { "epoch": 2.162291169451074, "grad_norm": 0.38043317198753357, "learning_rate": 2.1920364086623496e-06, "loss": 0.3482, "step": 5134 }, { "epoch": 2.162712340306051, "grad_norm": 0.4146663546562195, "learning_rate": 2.1900088188365397e-06, "loss": 0.4333, "step": 5135 }, { "epoch": 2.1631335111610275, "grad_norm": 0.3787210285663605, "learning_rate": 2.187981904197854e-06, "loss": 0.392, "step": 5136 }, { "epoch": 2.1635546820160045, "grad_norm": 0.38804882764816284, "learning_rate": 2.1859556652333177e-06, "loss": 0.3821, "step": 5137 }, { "epoch": 2.163975852870981, "grad_norm": 0.3717060983181, "learning_rate": 2.1839301024298003e-06, "loss": 0.3801, "step": 5138 }, { "epoch": 2.164397023725958, "grad_norm": 0.4253590404987335, "learning_rate": 2.181905216274005e-06, "loss": 0.3524, "step": 5139 }, { "epoch": 2.164818194580935, "grad_norm": 0.41757556796073914, "learning_rate": 2.1798810072524708e-06, "loss": 0.394, "step": 5140 }, { "epoch": 2.165239365435912, "grad_norm": 0.4119146764278412, "learning_rate": 2.1778574758515793e-06, "loss": 0.4224, "step": 5141 }, { "epoch": 2.1656605362908885, "grad_norm": 0.4022953510284424, "learning_rate": 2.1758346225575438e-06, "loss": 0.3696, "step": 5142 }, { "epoch": 2.1660817071458656, "grad_norm": 0.41630083322525024, "learning_rate": 2.1738124478564187e-06, "loss": 0.3967, "step": 5143 }, { "epoch": 2.166502878000842, "grad_norm": 0.42162224650382996, "learning_rate": 2.1717909522340978e-06, "loss": 0.4268, "step": 5144 }, { "epoch": 2.1669240488558192, "grad_norm": 0.40277647972106934, "learning_rate": 2.1697701361763002e-06, "loss": 0.3686, "step": 5145 }, { "epoch": 2.167345219710796, "grad_norm": 0.4165695011615753, "learning_rate": 2.167750000168595e-06, "loss": 0.4459, "step": 5146 }, { "epoch": 2.167766390565773, "grad_norm": 0.3917098343372345, "learning_rate": 2.1657305446963785e-06, "loss": 0.3951, "step": 5147 }, { "epoch": 2.1681875614207495, "grad_norm": 0.41845256090164185, "learning_rate": 2.1637117702448885e-06, "loss": 0.3797, "step": 5148 }, { "epoch": 2.1686087322757266, "grad_norm": 0.39974331855773926, "learning_rate": 2.1616936772992023e-06, "loss": 0.3873, "step": 5149 }, { "epoch": 2.169029903130703, "grad_norm": 0.41337525844573975, "learning_rate": 2.159676266344222e-06, "loss": 0.409, "step": 5150 }, { "epoch": 2.1694510739856803, "grad_norm": 0.43748435378074646, "learning_rate": 2.1576595378646955e-06, "loss": 0.4177, "step": 5151 }, { "epoch": 2.169872244840657, "grad_norm": 0.35888832807540894, "learning_rate": 2.155643492345206e-06, "loss": 0.3689, "step": 5152 }, { "epoch": 2.170293415695634, "grad_norm": 0.44524499773979187, "learning_rate": 2.1536281302701686e-06, "loss": 0.4246, "step": 5153 }, { "epoch": 2.1707145865506106, "grad_norm": 0.3800692856311798, "learning_rate": 2.151613452123838e-06, "loss": 0.3422, "step": 5154 }, { "epoch": 2.1711357574055876, "grad_norm": 0.4018844664096832, "learning_rate": 2.149599458390302e-06, "loss": 0.4047, "step": 5155 }, { "epoch": 2.1715569282605642, "grad_norm": 0.43955886363983154, "learning_rate": 2.1475861495534823e-06, "loss": 0.4237, "step": 5156 }, { "epoch": 2.1719780991155413, "grad_norm": 0.4080359637737274, "learning_rate": 2.145573526097142e-06, "loss": 0.3984, "step": 5157 }, { "epoch": 2.172399269970518, "grad_norm": 0.43900641798973083, "learning_rate": 2.1435615885048737e-06, "loss": 0.4024, "step": 5158 }, { "epoch": 2.172820440825495, "grad_norm": 0.5101897716522217, "learning_rate": 2.14155033726011e-06, "loss": 0.4619, "step": 5159 }, { "epoch": 2.1732416116804716, "grad_norm": 0.42131683230400085, "learning_rate": 2.1395397728461146e-06, "loss": 0.3844, "step": 5160 }, { "epoch": 2.1736627825354486, "grad_norm": 0.4409100413322449, "learning_rate": 2.137529895745986e-06, "loss": 0.4158, "step": 5161 }, { "epoch": 2.1740839533904253, "grad_norm": 0.4032442271709442, "learning_rate": 2.1355207064426625e-06, "loss": 0.4, "step": 5162 }, { "epoch": 2.1745051242454023, "grad_norm": 0.4220845103263855, "learning_rate": 2.1335122054189104e-06, "loss": 0.3736, "step": 5163 }, { "epoch": 2.174926295100379, "grad_norm": 0.4024972915649414, "learning_rate": 2.131504393157338e-06, "loss": 0.3826, "step": 5164 }, { "epoch": 2.175347465955356, "grad_norm": 0.5222643613815308, "learning_rate": 2.1294972701403815e-06, "loss": 0.4745, "step": 5165 }, { "epoch": 2.1757686368103326, "grad_norm": 0.4138134717941284, "learning_rate": 2.1274908368503133e-06, "loss": 0.3804, "step": 5166 }, { "epoch": 2.1761898076653097, "grad_norm": 0.41271787881851196, "learning_rate": 2.125485093769242e-06, "loss": 0.3945, "step": 5167 }, { "epoch": 2.1766109785202863, "grad_norm": 0.4342038929462433, "learning_rate": 2.1234800413791125e-06, "loss": 0.4252, "step": 5168 }, { "epoch": 2.1770321493752633, "grad_norm": 0.5008268356323242, "learning_rate": 2.1214756801616963e-06, "loss": 0.4479, "step": 5169 }, { "epoch": 2.17745332023024, "grad_norm": 0.4557649493217468, "learning_rate": 2.119472010598604e-06, "loss": 0.3835, "step": 5170 }, { "epoch": 2.177874491085217, "grad_norm": 0.47074463963508606, "learning_rate": 2.11746903317128e-06, "loss": 0.4233, "step": 5171 }, { "epoch": 2.1782956619401936, "grad_norm": 0.4477071762084961, "learning_rate": 2.115466748361e-06, "loss": 0.4098, "step": 5172 }, { "epoch": 2.1787168327951707, "grad_norm": 0.46382027864456177, "learning_rate": 2.113465156648877e-06, "loss": 0.4088, "step": 5173 }, { "epoch": 2.1791380036501473, "grad_norm": 0.46039408445358276, "learning_rate": 2.111464258515854e-06, "loss": 0.4808, "step": 5174 }, { "epoch": 2.1795591745051244, "grad_norm": 0.3848892152309418, "learning_rate": 2.109464054442707e-06, "loss": 0.3896, "step": 5175 }, { "epoch": 2.179980345360101, "grad_norm": 0.47365567088127136, "learning_rate": 2.1074645449100495e-06, "loss": 0.4244, "step": 5176 }, { "epoch": 2.180401516215078, "grad_norm": 0.44621458649635315, "learning_rate": 2.1054657303983223e-06, "loss": 0.4479, "step": 5177 }, { "epoch": 2.1808226870700547, "grad_norm": 0.36894017457962036, "learning_rate": 2.103467611387806e-06, "loss": 0.3323, "step": 5178 }, { "epoch": 2.1812438579250317, "grad_norm": 0.4154558777809143, "learning_rate": 2.1014701883586087e-06, "loss": 0.4082, "step": 5179 }, { "epoch": 2.1816650287800083, "grad_norm": 0.4428524076938629, "learning_rate": 2.099473461790671e-06, "loss": 0.3925, "step": 5180 }, { "epoch": 2.1820861996349854, "grad_norm": 0.4289606511592865, "learning_rate": 2.097477432163771e-06, "loss": 0.4172, "step": 5181 }, { "epoch": 2.182507370489962, "grad_norm": 0.45041078329086304, "learning_rate": 2.0954820999575136e-06, "loss": 0.4566, "step": 5182 }, { "epoch": 2.182928541344939, "grad_norm": 0.42999252676963806, "learning_rate": 2.0934874656513426e-06, "loss": 0.4305, "step": 5183 }, { "epoch": 2.1833497121999157, "grad_norm": 0.45589742064476013, "learning_rate": 2.091493529724528e-06, "loss": 0.4435, "step": 5184 }, { "epoch": 2.1837708830548928, "grad_norm": 0.42294633388519287, "learning_rate": 2.0895002926561734e-06, "loss": 0.3974, "step": 5185 }, { "epoch": 2.1841920539098694, "grad_norm": 0.40835437178611755, "learning_rate": 2.087507754925217e-06, "loss": 0.417, "step": 5186 }, { "epoch": 2.1846132247648464, "grad_norm": 0.47507235407829285, "learning_rate": 2.0855159170104293e-06, "loss": 0.4207, "step": 5187 }, { "epoch": 2.185034395619823, "grad_norm": 0.4413241446018219, "learning_rate": 2.083524779390409e-06, "loss": 0.3886, "step": 5188 }, { "epoch": 2.1854555664748, "grad_norm": 0.47596150636672974, "learning_rate": 2.081534342543587e-06, "loss": 0.4263, "step": 5189 }, { "epoch": 2.1858767373297767, "grad_norm": 0.421118825674057, "learning_rate": 2.07954460694823e-06, "loss": 0.3902, "step": 5190 }, { "epoch": 2.186297908184754, "grad_norm": 0.45684725046157837, "learning_rate": 2.07755557308243e-06, "loss": 0.4449, "step": 5191 }, { "epoch": 2.1867190790397304, "grad_norm": 0.45551976561546326, "learning_rate": 2.0755672414241184e-06, "loss": 0.4308, "step": 5192 }, { "epoch": 2.1871402498947075, "grad_norm": 0.42215102910995483, "learning_rate": 2.0735796124510506e-06, "loss": 0.3925, "step": 5193 }, { "epoch": 2.187561420749684, "grad_norm": 0.3893393278121948, "learning_rate": 2.071592686640813e-06, "loss": 0.4146, "step": 5194 }, { "epoch": 2.187982591604661, "grad_norm": 0.41675153374671936, "learning_rate": 2.069606464470832e-06, "loss": 0.4238, "step": 5195 }, { "epoch": 2.1884037624596377, "grad_norm": 0.4439897835254669, "learning_rate": 2.067620946418353e-06, "loss": 0.3852, "step": 5196 }, { "epoch": 2.188824933314615, "grad_norm": 0.3936765491962433, "learning_rate": 2.0656361329604625e-06, "loss": 0.3971, "step": 5197 }, { "epoch": 2.1892461041695914, "grad_norm": 0.48141878843307495, "learning_rate": 2.063652024574071e-06, "loss": 0.4179, "step": 5198 }, { "epoch": 2.1896672750245685, "grad_norm": 0.4280008375644684, "learning_rate": 2.06166862173592e-06, "loss": 0.4315, "step": 5199 }, { "epoch": 2.190088445879545, "grad_norm": 0.4201371371746063, "learning_rate": 2.0596859249225876e-06, "loss": 0.4149, "step": 5200 }, { "epoch": 2.190509616734522, "grad_norm": 0.4328722655773163, "learning_rate": 2.057703934610474e-06, "loss": 0.4008, "step": 5201 }, { "epoch": 2.1909307875894988, "grad_norm": 0.4292289912700653, "learning_rate": 2.0557226512758144e-06, "loss": 0.4303, "step": 5202 }, { "epoch": 2.191351958444476, "grad_norm": 0.41062456369400024, "learning_rate": 2.053742075394678e-06, "loss": 0.428, "step": 5203 }, { "epoch": 2.1917731292994524, "grad_norm": 0.4374335706233978, "learning_rate": 2.0517622074429523e-06, "loss": 0.3906, "step": 5204 }, { "epoch": 2.1921943001544295, "grad_norm": 0.40441471338272095, "learning_rate": 2.049783047896366e-06, "loss": 0.432, "step": 5205 }, { "epoch": 2.192615471009406, "grad_norm": 0.371158242225647, "learning_rate": 2.0478045972304698e-06, "loss": 0.354, "step": 5206 }, { "epoch": 2.193036641864383, "grad_norm": 0.41813135147094727, "learning_rate": 2.04582685592065e-06, "loss": 0.4307, "step": 5207 }, { "epoch": 2.19345781271936, "grad_norm": 0.39163780212402344, "learning_rate": 2.043849824442124e-06, "loss": 0.3604, "step": 5208 }, { "epoch": 2.193878983574337, "grad_norm": 0.4228566586971283, "learning_rate": 2.041873503269926e-06, "loss": 0.3974, "step": 5209 }, { "epoch": 2.1943001544293135, "grad_norm": 0.39350467920303345, "learning_rate": 2.039897892878933e-06, "loss": 0.4423, "step": 5210 }, { "epoch": 2.1947213252842905, "grad_norm": 0.3881506025791168, "learning_rate": 2.0379229937438477e-06, "loss": 0.3879, "step": 5211 }, { "epoch": 2.195142496139267, "grad_norm": 0.3787629008293152, "learning_rate": 2.035948806339197e-06, "loss": 0.3587, "step": 5212 }, { "epoch": 2.1955636669942438, "grad_norm": 0.4517948031425476, "learning_rate": 2.0339753311393436e-06, "loss": 0.4755, "step": 5213 }, { "epoch": 2.195984837849221, "grad_norm": 0.4062199890613556, "learning_rate": 2.032002568618475e-06, "loss": 0.3515, "step": 5214 }, { "epoch": 2.196406008704198, "grad_norm": 0.5095516443252563, "learning_rate": 2.030030519250605e-06, "loss": 0.4693, "step": 5215 }, { "epoch": 2.1968271795591745, "grad_norm": 0.44980329275131226, "learning_rate": 2.0280591835095847e-06, "loss": 0.4154, "step": 5216 }, { "epoch": 2.197248350414151, "grad_norm": 0.3939957916736603, "learning_rate": 2.0260885618690836e-06, "loss": 0.3921, "step": 5217 }, { "epoch": 2.197669521269128, "grad_norm": 0.4120946228504181, "learning_rate": 2.024118654802608e-06, "loss": 0.4137, "step": 5218 }, { "epoch": 2.1980906921241052, "grad_norm": 0.38247066736221313, "learning_rate": 2.0221494627834883e-06, "loss": 0.401, "step": 5219 }, { "epoch": 2.198511862979082, "grad_norm": 0.36820289492607117, "learning_rate": 2.0201809862848796e-06, "loss": 0.3741, "step": 5220 }, { "epoch": 2.1989330338340585, "grad_norm": 0.4513015151023865, "learning_rate": 2.0182132257797756e-06, "loss": 0.4348, "step": 5221 }, { "epoch": 2.1993542046890355, "grad_norm": 0.47725996375083923, "learning_rate": 2.0162461817409853e-06, "loss": 0.4097, "step": 5222 }, { "epoch": 2.1997753755440126, "grad_norm": 0.4752921760082245, "learning_rate": 2.014279854641157e-06, "loss": 0.3705, "step": 5223 }, { "epoch": 2.200196546398989, "grad_norm": 0.41797158122062683, "learning_rate": 2.012314244952758e-06, "loss": 0.3589, "step": 5224 }, { "epoch": 2.200617717253966, "grad_norm": 0.4415666162967682, "learning_rate": 2.010349353148087e-06, "loss": 0.4215, "step": 5225 }, { "epoch": 2.201038888108943, "grad_norm": 0.4304639995098114, "learning_rate": 2.008385179699271e-06, "loss": 0.3881, "step": 5226 }, { "epoch": 2.2014600589639195, "grad_norm": 0.4369891881942749, "learning_rate": 2.006421725078264e-06, "loss": 0.4204, "step": 5227 }, { "epoch": 2.2018812298188966, "grad_norm": 0.4088611602783203, "learning_rate": 2.004458989756846e-06, "loss": 0.4009, "step": 5228 }, { "epoch": 2.202302400673873, "grad_norm": 0.42852583527565, "learning_rate": 2.0024969742066232e-06, "loss": 0.4217, "step": 5229 }, { "epoch": 2.2027235715288502, "grad_norm": 0.3797823488712311, "learning_rate": 2.0005356788990325e-06, "loss": 0.3656, "step": 5230 }, { "epoch": 2.203144742383827, "grad_norm": 0.3533014953136444, "learning_rate": 1.998575104305333e-06, "loss": 0.3901, "step": 5231 }, { "epoch": 2.203565913238804, "grad_norm": 0.455768883228302, "learning_rate": 1.9966152508966167e-06, "loss": 0.4874, "step": 5232 }, { "epoch": 2.2039870840937805, "grad_norm": 0.39676594734191895, "learning_rate": 1.994656119143798e-06, "loss": 0.4141, "step": 5233 }, { "epoch": 2.2044082549487576, "grad_norm": 0.36942729353904724, "learning_rate": 1.992697709517615e-06, "loss": 0.3752, "step": 5234 }, { "epoch": 2.204829425803734, "grad_norm": 0.42324361205101013, "learning_rate": 1.990740022488642e-06, "loss": 0.4322, "step": 5235 }, { "epoch": 2.2052505966587113, "grad_norm": 0.4239000976085663, "learning_rate": 1.988783058527268e-06, "loss": 0.3763, "step": 5236 }, { "epoch": 2.205671767513688, "grad_norm": 0.4425450265407562, "learning_rate": 1.9868268181037186e-06, "loss": 0.4191, "step": 5237 }, { "epoch": 2.206092938368665, "grad_norm": 0.4159541428089142, "learning_rate": 1.9848713016880396e-06, "loss": 0.3804, "step": 5238 }, { "epoch": 2.2065141092236416, "grad_norm": 0.3812650144100189, "learning_rate": 1.982916509750101e-06, "loss": 0.3956, "step": 5239 }, { "epoch": 2.2069352800786186, "grad_norm": 0.4436025619506836, "learning_rate": 1.9809624427596074e-06, "loss": 0.4296, "step": 5240 }, { "epoch": 2.2073564509335952, "grad_norm": 0.3970624506473541, "learning_rate": 1.9790091011860786e-06, "loss": 0.367, "step": 5241 }, { "epoch": 2.2077776217885723, "grad_norm": 0.40713295340538025, "learning_rate": 1.9770564854988678e-06, "loss": 0.4157, "step": 5242 }, { "epoch": 2.208198792643549, "grad_norm": 0.4027146100997925, "learning_rate": 1.975104596167154e-06, "loss": 0.3418, "step": 5243 }, { "epoch": 2.208619963498526, "grad_norm": 0.4639572203159332, "learning_rate": 1.9731534336599322e-06, "loss": 0.4456, "step": 5244 }, { "epoch": 2.2090411343535026, "grad_norm": 0.4500899016857147, "learning_rate": 1.9712029984460335e-06, "loss": 0.3919, "step": 5245 }, { "epoch": 2.2094623052084796, "grad_norm": 0.47124356031417847, "learning_rate": 1.969253290994111e-06, "loss": 0.4237, "step": 5246 }, { "epoch": 2.2098834760634563, "grad_norm": 0.43754833936691284, "learning_rate": 1.9673043117726387e-06, "loss": 0.3841, "step": 5247 }, { "epoch": 2.2103046469184333, "grad_norm": 0.44939175248146057, "learning_rate": 1.9653560612499234e-06, "loss": 0.4106, "step": 5248 }, { "epoch": 2.21072581777341, "grad_norm": 0.39543548226356506, "learning_rate": 1.96340853989409e-06, "loss": 0.4403, "step": 5249 }, { "epoch": 2.211146988628387, "grad_norm": 0.4060085117816925, "learning_rate": 1.9614617481730886e-06, "loss": 0.3713, "step": 5250 }, { "epoch": 2.2115681594833636, "grad_norm": 0.41238877177238464, "learning_rate": 1.9595156865547007e-06, "loss": 0.437, "step": 5251 }, { "epoch": 2.2119893303383407, "grad_norm": 0.432365357875824, "learning_rate": 1.957570355506522e-06, "loss": 0.3952, "step": 5252 }, { "epoch": 2.2124105011933173, "grad_norm": 0.40811190009117126, "learning_rate": 1.9556257554959847e-06, "loss": 0.3727, "step": 5253 }, { "epoch": 2.2128316720482943, "grad_norm": 0.43271490931510925, "learning_rate": 1.9536818869903346e-06, "loss": 0.3981, "step": 5254 }, { "epoch": 2.213252842903271, "grad_norm": 0.4471178650856018, "learning_rate": 1.951738750456646e-06, "loss": 0.4472, "step": 5255 }, { "epoch": 2.213674013758248, "grad_norm": 0.42990604043006897, "learning_rate": 1.9497963463618203e-06, "loss": 0.4202, "step": 5256 }, { "epoch": 2.2140951846132246, "grad_norm": 0.407245397567749, "learning_rate": 1.9478546751725762e-06, "loss": 0.37, "step": 5257 }, { "epoch": 2.2145163554682017, "grad_norm": 0.41181543469429016, "learning_rate": 1.945913737355465e-06, "loss": 0.4414, "step": 5258 }, { "epoch": 2.2149375263231783, "grad_norm": 0.4397641718387604, "learning_rate": 1.943973533376854e-06, "loss": 0.3653, "step": 5259 }, { "epoch": 2.2153586971781554, "grad_norm": 0.4013705551624298, "learning_rate": 1.942034063702935e-06, "loss": 0.3775, "step": 5260 }, { "epoch": 2.215779868033132, "grad_norm": 0.3956470787525177, "learning_rate": 1.940095328799728e-06, "loss": 0.4402, "step": 5261 }, { "epoch": 2.216201038888109, "grad_norm": 0.4053364396095276, "learning_rate": 1.938157329133076e-06, "loss": 0.3925, "step": 5262 }, { "epoch": 2.2166222097430857, "grad_norm": 0.43281176686286926, "learning_rate": 1.936220065168641e-06, "loss": 0.3504, "step": 5263 }, { "epoch": 2.2170433805980627, "grad_norm": 0.41328689455986023, "learning_rate": 1.93428353737191e-06, "loss": 0.417, "step": 5264 }, { "epoch": 2.2174645514530393, "grad_norm": 0.3992960453033447, "learning_rate": 1.9323477462081925e-06, "loss": 0.4116, "step": 5265 }, { "epoch": 2.2178857223080164, "grad_norm": 0.390095591545105, "learning_rate": 1.9304126921426235e-06, "loss": 0.4164, "step": 5266 }, { "epoch": 2.218306893162993, "grad_norm": 0.4083558917045593, "learning_rate": 1.9284783756401615e-06, "loss": 0.3857, "step": 5267 }, { "epoch": 2.21872806401797, "grad_norm": 0.39213624596595764, "learning_rate": 1.926544797165584e-06, "loss": 0.3879, "step": 5268 }, { "epoch": 2.2191492348729467, "grad_norm": 0.4161582887172699, "learning_rate": 1.9246119571834904e-06, "loss": 0.3963, "step": 5269 }, { "epoch": 2.2195704057279237, "grad_norm": 0.4363543391227722, "learning_rate": 1.92267985615831e-06, "loss": 0.469, "step": 5270 }, { "epoch": 2.2199915765829004, "grad_norm": 0.468834787607193, "learning_rate": 1.920748494554285e-06, "loss": 0.3411, "step": 5271 }, { "epoch": 2.2204127474378774, "grad_norm": 0.3663753867149353, "learning_rate": 1.9188178728354885e-06, "loss": 0.3722, "step": 5272 }, { "epoch": 2.220833918292854, "grad_norm": 0.4742923676967621, "learning_rate": 1.9168879914658105e-06, "loss": 0.5252, "step": 5273 }, { "epoch": 2.221255089147831, "grad_norm": 0.40384209156036377, "learning_rate": 1.914958850908963e-06, "loss": 0.3762, "step": 5274 }, { "epoch": 2.2216762600028077, "grad_norm": 0.3994077146053314, "learning_rate": 1.9130304516284842e-06, "loss": 0.3729, "step": 5275 }, { "epoch": 2.2220974308577848, "grad_norm": 0.42767462134361267, "learning_rate": 1.9111027940877285e-06, "loss": 0.4373, "step": 5276 }, { "epoch": 2.2225186017127614, "grad_norm": 0.390267014503479, "learning_rate": 1.9091758787498794e-06, "loss": 0.3654, "step": 5277 }, { "epoch": 2.2229397725677384, "grad_norm": 0.3634004294872284, "learning_rate": 1.9072497060779349e-06, "loss": 0.3772, "step": 5278 }, { "epoch": 2.223360943422715, "grad_norm": 0.41059213876724243, "learning_rate": 1.9053242765347162e-06, "loss": 0.4017, "step": 5279 }, { "epoch": 2.223782114277692, "grad_norm": 0.40916359424591064, "learning_rate": 1.9033995905828705e-06, "loss": 0.4289, "step": 5280 }, { "epoch": 2.2242032851326687, "grad_norm": 0.46210217475891113, "learning_rate": 1.9014756486848601e-06, "loss": 0.3975, "step": 5281 }, { "epoch": 2.224624455987646, "grad_norm": 0.4319279193878174, "learning_rate": 1.8995524513029722e-06, "loss": 0.4143, "step": 5282 }, { "epoch": 2.2250456268426224, "grad_norm": 0.44881677627563477, "learning_rate": 1.897629998899319e-06, "loss": 0.3988, "step": 5283 }, { "epoch": 2.2254667976975995, "grad_norm": 0.3918209373950958, "learning_rate": 1.895708291935821e-06, "loss": 0.4095, "step": 5284 }, { "epoch": 2.225887968552576, "grad_norm": 0.42192342877388, "learning_rate": 1.8937873308742316e-06, "loss": 0.3789, "step": 5285 }, { "epoch": 2.226309139407553, "grad_norm": 0.4140700399875641, "learning_rate": 1.8918671161761227e-06, "loss": 0.3778, "step": 5286 }, { "epoch": 2.2267303102625298, "grad_norm": 0.42567259073257446, "learning_rate": 1.8899476483028828e-06, "loss": 0.4198, "step": 5287 }, { "epoch": 2.227151481117507, "grad_norm": 0.4346098005771637, "learning_rate": 1.8880289277157227e-06, "loss": 0.3958, "step": 5288 }, { "epoch": 2.2275726519724834, "grad_norm": 0.39970406889915466, "learning_rate": 1.8861109548756768e-06, "loss": 0.365, "step": 5289 }, { "epoch": 2.2279938228274605, "grad_norm": 0.4675781726837158, "learning_rate": 1.8841937302435948e-06, "loss": 0.477, "step": 5290 }, { "epoch": 2.228414993682437, "grad_norm": 0.4267253279685974, "learning_rate": 1.8822772542801515e-06, "loss": 0.3818, "step": 5291 }, { "epoch": 2.228836164537414, "grad_norm": 0.42855989933013916, "learning_rate": 1.880361527445838e-06, "loss": 0.4596, "step": 5292 }, { "epoch": 2.229257335392391, "grad_norm": 0.3580997586250305, "learning_rate": 1.8784465502009653e-06, "loss": 0.362, "step": 5293 }, { "epoch": 2.229678506247368, "grad_norm": 0.4222601652145386, "learning_rate": 1.8765323230056692e-06, "loss": 0.4488, "step": 5294 }, { "epoch": 2.2300996771023445, "grad_norm": 0.38030779361724854, "learning_rate": 1.8746188463198983e-06, "loss": 0.3518, "step": 5295 }, { "epoch": 2.2305208479573215, "grad_norm": 0.4105180501937866, "learning_rate": 1.872706120603428e-06, "loss": 0.4247, "step": 5296 }, { "epoch": 2.230942018812298, "grad_norm": 0.4381764829158783, "learning_rate": 1.870794146315848e-06, "loss": 0.4047, "step": 5297 }, { "epoch": 2.231363189667275, "grad_norm": 0.40396425127983093, "learning_rate": 1.8688829239165674e-06, "loss": 0.3971, "step": 5298 }, { "epoch": 2.231784360522252, "grad_norm": 0.4321228563785553, "learning_rate": 1.8669724538648194e-06, "loss": 0.4065, "step": 5299 }, { "epoch": 2.232205531377229, "grad_norm": 0.48116910457611084, "learning_rate": 1.8650627366196506e-06, "loss": 0.4321, "step": 5300 }, { "epoch": 2.2326267022322055, "grad_norm": 0.40899384021759033, "learning_rate": 1.8631537726399307e-06, "loss": 0.4205, "step": 5301 }, { "epoch": 2.2330478730871826, "grad_norm": 0.45887812972068787, "learning_rate": 1.8612455623843511e-06, "loss": 0.4612, "step": 5302 }, { "epoch": 2.233469043942159, "grad_norm": 0.3987126052379608, "learning_rate": 1.8593381063114113e-06, "loss": 0.352, "step": 5303 }, { "epoch": 2.2338902147971362, "grad_norm": 0.44202491641044617, "learning_rate": 1.85743140487944e-06, "loss": 0.379, "step": 5304 }, { "epoch": 2.234311385652113, "grad_norm": 0.4414190351963043, "learning_rate": 1.8555254585465826e-06, "loss": 0.4384, "step": 5305 }, { "epoch": 2.23473255650709, "grad_norm": 0.39965176582336426, "learning_rate": 1.8536202677707977e-06, "loss": 0.4003, "step": 5306 }, { "epoch": 2.2351537273620665, "grad_norm": 0.4520586431026459, "learning_rate": 1.85171583300987e-06, "loss": 0.3844, "step": 5307 }, { "epoch": 2.2355748982170436, "grad_norm": 0.4101162254810333, "learning_rate": 1.8498121547213977e-06, "loss": 0.3744, "step": 5308 }, { "epoch": 2.23599606907202, "grad_norm": 0.43040332198143005, "learning_rate": 1.847909233362795e-06, "loss": 0.4283, "step": 5309 }, { "epoch": 2.2364172399269973, "grad_norm": 0.4554721713066101, "learning_rate": 1.8460070693913024e-06, "loss": 0.4708, "step": 5310 }, { "epoch": 2.236838410781974, "grad_norm": 0.40619930624961853, "learning_rate": 1.8441056632639697e-06, "loss": 0.4009, "step": 5311 }, { "epoch": 2.237259581636951, "grad_norm": 0.4032803475856781, "learning_rate": 1.8422050154376715e-06, "loss": 0.4155, "step": 5312 }, { "epoch": 2.2376807524919275, "grad_norm": 0.4154336154460907, "learning_rate": 1.8403051263690958e-06, "loss": 0.3906, "step": 5313 }, { "epoch": 2.238101923346904, "grad_norm": 0.3966895639896393, "learning_rate": 1.8384059965147472e-06, "loss": 0.3821, "step": 5314 }, { "epoch": 2.2385230942018812, "grad_norm": 0.46380025148391724, "learning_rate": 1.8365076263309544e-06, "loss": 0.4174, "step": 5315 }, { "epoch": 2.2389442650568583, "grad_norm": 0.4150750935077667, "learning_rate": 1.8346100162738556e-06, "loss": 0.3665, "step": 5316 }, { "epoch": 2.239365435911835, "grad_norm": 0.5264639258384705, "learning_rate": 1.8327131667994142e-06, "loss": 0.4389, "step": 5317 }, { "epoch": 2.2397866067668115, "grad_norm": 0.3905714154243469, "learning_rate": 1.8308170783634045e-06, "loss": 0.3661, "step": 5318 }, { "epoch": 2.2402077776217886, "grad_norm": 0.4242396950721741, "learning_rate": 1.8289217514214186e-06, "loss": 0.4134, "step": 5319 }, { "epoch": 2.2406289484767656, "grad_norm": 0.4565039277076721, "learning_rate": 1.827027186428869e-06, "loss": 0.4095, "step": 5320 }, { "epoch": 2.2410501193317423, "grad_norm": 0.43600916862487793, "learning_rate": 1.8251333838409857e-06, "loss": 0.3947, "step": 5321 }, { "epoch": 2.241471290186719, "grad_norm": 0.4068264067173004, "learning_rate": 1.823240344112811e-06, "loss": 0.3821, "step": 5322 }, { "epoch": 2.241892461041696, "grad_norm": 0.3937532305717468, "learning_rate": 1.8213480676992062e-06, "loss": 0.4009, "step": 5323 }, { "epoch": 2.2423136318966725, "grad_norm": 0.4049861431121826, "learning_rate": 1.8194565550548477e-06, "loss": 0.4176, "step": 5324 }, { "epoch": 2.2427348027516496, "grad_norm": 0.3552359640598297, "learning_rate": 1.8175658066342312e-06, "loss": 0.3524, "step": 5325 }, { "epoch": 2.243155973606626, "grad_norm": 0.42365720868110657, "learning_rate": 1.8156758228916693e-06, "loss": 0.4412, "step": 5326 }, { "epoch": 2.2435771444616033, "grad_norm": 0.38276877999305725, "learning_rate": 1.8137866042812867e-06, "loss": 0.4049, "step": 5327 }, { "epoch": 2.24399831531658, "grad_norm": 0.4363619089126587, "learning_rate": 1.8118981512570255e-06, "loss": 0.438, "step": 5328 }, { "epoch": 2.244419486171557, "grad_norm": 0.41621142625808716, "learning_rate": 1.810010464272648e-06, "loss": 0.4494, "step": 5329 }, { "epoch": 2.2448406570265336, "grad_norm": 0.4033938944339752, "learning_rate": 1.8081235437817257e-06, "loss": 0.3459, "step": 5330 }, { "epoch": 2.2452618278815106, "grad_norm": 0.43256279826164246, "learning_rate": 1.8062373902376534e-06, "loss": 0.4103, "step": 5331 }, { "epoch": 2.2456829987364872, "grad_norm": 0.4316607713699341, "learning_rate": 1.8043520040936353e-06, "loss": 0.3905, "step": 5332 }, { "epoch": 2.2461041695914643, "grad_norm": 0.4907023310661316, "learning_rate": 1.8024673858026919e-06, "loss": 0.4494, "step": 5333 }, { "epoch": 2.246525340446441, "grad_norm": 0.4277940094470978, "learning_rate": 1.8005835358176649e-06, "loss": 0.3791, "step": 5334 }, { "epoch": 2.246946511301418, "grad_norm": 0.4615892469882965, "learning_rate": 1.7987004545912034e-06, "loss": 0.4434, "step": 5335 }, { "epoch": 2.2473676821563946, "grad_norm": 0.4401540756225586, "learning_rate": 1.7968181425757802e-06, "loss": 0.429, "step": 5336 }, { "epoch": 2.2477888530113717, "grad_norm": 0.4242324233055115, "learning_rate": 1.7949366002236762e-06, "loss": 0.4234, "step": 5337 }, { "epoch": 2.2482100238663483, "grad_norm": 0.4032739996910095, "learning_rate": 1.7930558279869886e-06, "loss": 0.3531, "step": 5338 }, { "epoch": 2.2486311947213253, "grad_norm": 0.4159521758556366, "learning_rate": 1.7911758263176343e-06, "loss": 0.3902, "step": 5339 }, { "epoch": 2.249052365576302, "grad_norm": 0.4431411325931549, "learning_rate": 1.7892965956673385e-06, "loss": 0.3993, "step": 5340 }, { "epoch": 2.249473536431279, "grad_norm": 0.4188850522041321, "learning_rate": 1.7874181364876464e-06, "loss": 0.4032, "step": 5341 }, { "epoch": 2.2498947072862556, "grad_norm": 0.4409734308719635, "learning_rate": 1.7855404492299188e-06, "loss": 0.4514, "step": 5342 }, { "epoch": 2.2503158781412327, "grad_norm": 0.4280811548233032, "learning_rate": 1.7836635343453212e-06, "loss": 0.3844, "step": 5343 }, { "epoch": 2.2507370489962093, "grad_norm": 0.4564816653728485, "learning_rate": 1.7817873922848439e-06, "loss": 0.4625, "step": 5344 }, { "epoch": 2.2511582198511864, "grad_norm": 0.4345380663871765, "learning_rate": 1.7799120234992889e-06, "loss": 0.3847, "step": 5345 }, { "epoch": 2.251579390706163, "grad_norm": 0.43241727352142334, "learning_rate": 1.7780374284392694e-06, "loss": 0.4601, "step": 5346 }, { "epoch": 2.25200056156114, "grad_norm": 0.3712218999862671, "learning_rate": 1.7761636075552168e-06, "loss": 0.3649, "step": 5347 }, { "epoch": 2.2524217324161167, "grad_norm": 0.4028675854206085, "learning_rate": 1.7742905612973743e-06, "loss": 0.3835, "step": 5348 }, { "epoch": 2.2528429032710937, "grad_norm": 0.48148199915885925, "learning_rate": 1.7724182901157956e-06, "loss": 0.4047, "step": 5349 }, { "epoch": 2.2532640741260703, "grad_norm": 0.410363107919693, "learning_rate": 1.770546794460356e-06, "loss": 0.4007, "step": 5350 }, { "epoch": 2.2536852449810474, "grad_norm": 0.4237666428089142, "learning_rate": 1.7686760747807369e-06, "loss": 0.4181, "step": 5351 }, { "epoch": 2.254106415836024, "grad_norm": 0.4136888384819031, "learning_rate": 1.7668061315264389e-06, "loss": 0.4407, "step": 5352 }, { "epoch": 2.254527586691001, "grad_norm": 0.42527833580970764, "learning_rate": 1.764936965146773e-06, "loss": 0.4199, "step": 5353 }, { "epoch": 2.2549487575459777, "grad_norm": 0.4695926904678345, "learning_rate": 1.7630685760908623e-06, "loss": 0.4184, "step": 5354 }, { "epoch": 2.2553699284009547, "grad_norm": 0.43643319606781006, "learning_rate": 1.7612009648076473e-06, "loss": 0.4159, "step": 5355 }, { "epoch": 2.2557910992559314, "grad_norm": 0.42491790652275085, "learning_rate": 1.7593341317458767e-06, "loss": 0.4279, "step": 5356 }, { "epoch": 2.2562122701109084, "grad_norm": 0.43402138352394104, "learning_rate": 1.757468077354118e-06, "loss": 0.4136, "step": 5357 }, { "epoch": 2.256633440965885, "grad_norm": 0.43747422099113464, "learning_rate": 1.7556028020807464e-06, "loss": 0.3891, "step": 5358 }, { "epoch": 2.257054611820862, "grad_norm": 0.4270867705345154, "learning_rate": 1.753738306373951e-06, "loss": 0.4227, "step": 5359 }, { "epoch": 2.2574757826758387, "grad_norm": 0.41820740699768066, "learning_rate": 1.7518745906817352e-06, "loss": 0.4137, "step": 5360 }, { "epoch": 2.2578969535308158, "grad_norm": 0.4258420467376709, "learning_rate": 1.7500116554519164e-06, "loss": 0.3825, "step": 5361 }, { "epoch": 2.2583181243857924, "grad_norm": 0.4512486159801483, "learning_rate": 1.748149501132121e-06, "loss": 0.4507, "step": 5362 }, { "epoch": 2.2587392952407694, "grad_norm": 0.42204782366752625, "learning_rate": 1.7462881281697857e-06, "loss": 0.3877, "step": 5363 }, { "epoch": 2.259160466095746, "grad_norm": 0.3856404721736908, "learning_rate": 1.7444275370121683e-06, "loss": 0.378, "step": 5364 }, { "epoch": 2.259581636950723, "grad_norm": 0.4197109639644623, "learning_rate": 1.7425677281063286e-06, "loss": 0.4082, "step": 5365 }, { "epoch": 2.2600028078056997, "grad_norm": 0.4256783425807953, "learning_rate": 1.7407087018991469e-06, "loss": 0.415, "step": 5366 }, { "epoch": 2.260423978660677, "grad_norm": 0.42654654383659363, "learning_rate": 1.7388504588373101e-06, "loss": 0.3835, "step": 5367 }, { "epoch": 2.2608451495156534, "grad_norm": 0.42911919951438904, "learning_rate": 1.7369929993673162e-06, "loss": 0.424, "step": 5368 }, { "epoch": 2.2612663203706305, "grad_norm": 0.3865116834640503, "learning_rate": 1.7351363239354813e-06, "loss": 0.3747, "step": 5369 }, { "epoch": 2.261687491225607, "grad_norm": 0.45803186297416687, "learning_rate": 1.733280432987925e-06, "loss": 0.4233, "step": 5370 }, { "epoch": 2.262108662080584, "grad_norm": 0.42786601185798645, "learning_rate": 1.7314253269705854e-06, "loss": 0.4103, "step": 5371 }, { "epoch": 2.2625298329355608, "grad_norm": 0.39531275629997253, "learning_rate": 1.7295710063292087e-06, "loss": 0.4151, "step": 5372 }, { "epoch": 2.262951003790538, "grad_norm": 0.40401217341423035, "learning_rate": 1.7277174715093499e-06, "loss": 0.4214, "step": 5373 }, { "epoch": 2.2633721746455144, "grad_norm": 0.45558226108551025, "learning_rate": 1.7258647229563813e-06, "loss": 0.4382, "step": 5374 }, { "epoch": 2.2637933455004915, "grad_norm": 0.3884233236312866, "learning_rate": 1.72401276111548e-06, "loss": 0.3777, "step": 5375 }, { "epoch": 2.264214516355468, "grad_norm": 0.41816940903663635, "learning_rate": 1.7221615864316389e-06, "loss": 0.4055, "step": 5376 }, { "epoch": 2.264635687210445, "grad_norm": 0.43190091848373413, "learning_rate": 1.7203111993496623e-06, "loss": 0.3903, "step": 5377 }, { "epoch": 2.265056858065422, "grad_norm": 0.445542573928833, "learning_rate": 1.7184616003141569e-06, "loss": 0.4117, "step": 5378 }, { "epoch": 2.265478028920399, "grad_norm": 0.41253042221069336, "learning_rate": 1.7166127897695488e-06, "loss": 0.4255, "step": 5379 }, { "epoch": 2.2658991997753755, "grad_norm": 0.38006308674812317, "learning_rate": 1.7147647681600737e-06, "loss": 0.381, "step": 5380 }, { "epoch": 2.2663203706303525, "grad_norm": 0.4279424250125885, "learning_rate": 1.7129175359297727e-06, "loss": 0.3877, "step": 5381 }, { "epoch": 2.266741541485329, "grad_norm": 0.42587652802467346, "learning_rate": 1.7110710935225055e-06, "loss": 0.4515, "step": 5382 }, { "epoch": 2.267162712340306, "grad_norm": 0.42531847953796387, "learning_rate": 1.7092254413819298e-06, "loss": 0.3997, "step": 5383 }, { "epoch": 2.267583883195283, "grad_norm": 0.39346885681152344, "learning_rate": 1.7073805799515237e-06, "loss": 0.434, "step": 5384 }, { "epoch": 2.26800505405026, "grad_norm": 0.37914079427719116, "learning_rate": 1.7055365096745747e-06, "loss": 0.4232, "step": 5385 }, { "epoch": 2.2684262249052365, "grad_norm": 0.3927444517612457, "learning_rate": 1.7036932309941728e-06, "loss": 0.3755, "step": 5386 }, { "epoch": 2.2688473957602135, "grad_norm": 0.4265426993370056, "learning_rate": 1.7018507443532267e-06, "loss": 0.4089, "step": 5387 }, { "epoch": 2.26926856661519, "grad_norm": 0.3495652377605438, "learning_rate": 1.70000905019445e-06, "loss": 0.3734, "step": 5388 }, { "epoch": 2.2696897374701672, "grad_norm": 0.4114154875278473, "learning_rate": 1.698168148960363e-06, "loss": 0.4066, "step": 5389 }, { "epoch": 2.270110908325144, "grad_norm": 0.3814067542552948, "learning_rate": 1.6963280410933036e-06, "loss": 0.384, "step": 5390 }, { "epoch": 2.270532079180121, "grad_norm": 0.38451138138771057, "learning_rate": 1.6944887270354127e-06, "loss": 0.3455, "step": 5391 }, { "epoch": 2.2709532500350975, "grad_norm": 0.43311816453933716, "learning_rate": 1.6926502072286405e-06, "loss": 0.4494, "step": 5392 }, { "epoch": 2.2713744208900746, "grad_norm": 0.3933102786540985, "learning_rate": 1.690812482114752e-06, "loss": 0.3926, "step": 5393 }, { "epoch": 2.271795591745051, "grad_norm": 0.42353343963623047, "learning_rate": 1.688975552135313e-06, "loss": 0.3769, "step": 5394 }, { "epoch": 2.2722167626000283, "grad_norm": 0.39656946063041687, "learning_rate": 1.6871394177317063e-06, "loss": 0.4102, "step": 5395 }, { "epoch": 2.272637933455005, "grad_norm": 0.42183518409729004, "learning_rate": 1.6853040793451186e-06, "loss": 0.3619, "step": 5396 }, { "epoch": 2.273059104309982, "grad_norm": 0.41460952162742615, "learning_rate": 1.683469537416545e-06, "loss": 0.4151, "step": 5397 }, { "epoch": 2.2734802751649585, "grad_norm": 0.45744258165359497, "learning_rate": 1.6816357923867937e-06, "loss": 0.4097, "step": 5398 }, { "epoch": 2.2739014460199356, "grad_norm": 0.44827619194984436, "learning_rate": 1.6798028446964753e-06, "loss": 0.3968, "step": 5399 }, { "epoch": 2.274322616874912, "grad_norm": 0.4427255392074585, "learning_rate": 1.6779706947860142e-06, "loss": 0.4344, "step": 5400 }, { "epoch": 2.2747437877298893, "grad_norm": 0.4293989837169647, "learning_rate": 1.6761393430956436e-06, "loss": 0.3679, "step": 5401 }, { "epoch": 2.275164958584866, "grad_norm": 0.4407489001750946, "learning_rate": 1.674308790065396e-06, "loss": 0.388, "step": 5402 }, { "epoch": 2.275586129439843, "grad_norm": 0.4214566648006439, "learning_rate": 1.6724790361351217e-06, "loss": 0.3963, "step": 5403 }, { "epoch": 2.2760073002948196, "grad_norm": 0.4080732762813568, "learning_rate": 1.670650081744477e-06, "loss": 0.3539, "step": 5404 }, { "epoch": 2.2764284711497966, "grad_norm": 0.4474928081035614, "learning_rate": 1.6688219273329215e-06, "loss": 0.4028, "step": 5405 }, { "epoch": 2.2768496420047732, "grad_norm": 0.46758997440338135, "learning_rate": 1.6669945733397292e-06, "loss": 0.4399, "step": 5406 }, { "epoch": 2.27727081285975, "grad_norm": 0.4551331400871277, "learning_rate": 1.6651680202039761e-06, "loss": 0.4631, "step": 5407 }, { "epoch": 2.277691983714727, "grad_norm": 0.38431358337402344, "learning_rate": 1.6633422683645467e-06, "loss": 0.3675, "step": 5408 }, { "epoch": 2.278113154569704, "grad_norm": 0.4420991837978363, "learning_rate": 1.6615173182601374e-06, "loss": 0.4272, "step": 5409 }, { "epoch": 2.2785343254246806, "grad_norm": 0.4522319436073303, "learning_rate": 1.6596931703292457e-06, "loss": 0.4317, "step": 5410 }, { "epoch": 2.278955496279657, "grad_norm": 0.39260709285736084, "learning_rate": 1.6578698250101828e-06, "loss": 0.3972, "step": 5411 }, { "epoch": 2.2793766671346343, "grad_norm": 0.42926982045173645, "learning_rate": 1.6560472827410618e-06, "loss": 0.3872, "step": 5412 }, { "epoch": 2.2797978379896113, "grad_norm": 0.4214784502983093, "learning_rate": 1.6542255439598027e-06, "loss": 0.373, "step": 5413 }, { "epoch": 2.280219008844588, "grad_norm": 0.4007301330566406, "learning_rate": 1.6524046091041384e-06, "loss": 0.4115, "step": 5414 }, { "epoch": 2.2806401796995646, "grad_norm": 0.43940019607543945, "learning_rate": 1.6505844786116016e-06, "loss": 0.4842, "step": 5415 }, { "epoch": 2.2810613505545416, "grad_norm": 0.4657045304775238, "learning_rate": 1.6487651529195375e-06, "loss": 0.4368, "step": 5416 }, { "epoch": 2.2814825214095187, "grad_norm": 0.41063669323921204, "learning_rate": 1.646946632465094e-06, "loss": 0.3931, "step": 5417 }, { "epoch": 2.2819036922644953, "grad_norm": 0.42626529932022095, "learning_rate": 1.6451289176852254e-06, "loss": 0.3886, "step": 5418 }, { "epoch": 2.282324863119472, "grad_norm": 0.40586236119270325, "learning_rate": 1.6433120090166942e-06, "loss": 0.4055, "step": 5419 }, { "epoch": 2.282746033974449, "grad_norm": 0.35881054401397705, "learning_rate": 1.6414959068960724e-06, "loss": 0.3518, "step": 5420 }, { "epoch": 2.283167204829426, "grad_norm": 0.4756277799606323, "learning_rate": 1.6396806117597308e-06, "loss": 0.4996, "step": 5421 }, { "epoch": 2.2835883756844026, "grad_norm": 0.4288623631000519, "learning_rate": 1.6378661240438498e-06, "loss": 0.3926, "step": 5422 }, { "epoch": 2.2840095465393793, "grad_norm": 0.45133063197135925, "learning_rate": 1.6360524441844194e-06, "loss": 0.4204, "step": 5423 }, { "epoch": 2.2844307173943563, "grad_norm": 0.4072868227958679, "learning_rate": 1.6342395726172284e-06, "loss": 0.3753, "step": 5424 }, { "epoch": 2.2848518882493334, "grad_norm": 0.4288344979286194, "learning_rate": 1.6324275097778786e-06, "loss": 0.4284, "step": 5425 }, { "epoch": 2.28527305910431, "grad_norm": 0.3827674686908722, "learning_rate": 1.6306162561017725e-06, "loss": 0.397, "step": 5426 }, { "epoch": 2.2856942299592866, "grad_norm": 0.47308093309402466, "learning_rate": 1.6288058120241175e-06, "loss": 0.4327, "step": 5427 }, { "epoch": 2.2861154008142637, "grad_norm": 0.38414138555526733, "learning_rate": 1.6269961779799327e-06, "loss": 0.3677, "step": 5428 }, { "epoch": 2.2865365716692407, "grad_norm": 0.4060676097869873, "learning_rate": 1.6251873544040347e-06, "loss": 0.4235, "step": 5429 }, { "epoch": 2.2869577425242174, "grad_norm": 0.4383370280265808, "learning_rate": 1.6233793417310529e-06, "loss": 0.384, "step": 5430 }, { "epoch": 2.287378913379194, "grad_norm": 0.47477981448173523, "learning_rate": 1.6215721403954166e-06, "loss": 0.4438, "step": 5431 }, { "epoch": 2.287800084234171, "grad_norm": 0.38268256187438965, "learning_rate": 1.6197657508313597e-06, "loss": 0.3774, "step": 5432 }, { "epoch": 2.2882212550891476, "grad_norm": 0.42980238795280457, "learning_rate": 1.617960173472926e-06, "loss": 0.3761, "step": 5433 }, { "epoch": 2.2886424259441247, "grad_norm": 0.44999608397483826, "learning_rate": 1.6161554087539594e-06, "loss": 0.4169, "step": 5434 }, { "epoch": 2.2890635967991013, "grad_norm": 0.44121894240379333, "learning_rate": 1.6143514571081097e-06, "loss": 0.4473, "step": 5435 }, { "epoch": 2.2894847676540784, "grad_norm": 0.3899534046649933, "learning_rate": 1.6125483189688373e-06, "loss": 0.3911, "step": 5436 }, { "epoch": 2.289905938509055, "grad_norm": 0.45315301418304443, "learning_rate": 1.6107459947693944e-06, "loss": 0.4437, "step": 5437 }, { "epoch": 2.290327109364032, "grad_norm": 0.39366215467453003, "learning_rate": 1.6089444849428482e-06, "loss": 0.3654, "step": 5438 }, { "epoch": 2.2907482802190087, "grad_norm": 0.38734498620033264, "learning_rate": 1.6071437899220688e-06, "loss": 0.3882, "step": 5439 }, { "epoch": 2.2911694510739857, "grad_norm": 0.45995280146598816, "learning_rate": 1.6053439101397257e-06, "loss": 0.4005, "step": 5440 }, { "epoch": 2.2915906219289623, "grad_norm": 0.3882145881652832, "learning_rate": 1.6035448460283005e-06, "loss": 0.3857, "step": 5441 }, { "epoch": 2.2920117927839394, "grad_norm": 0.4180435836315155, "learning_rate": 1.6017465980200663e-06, "loss": 0.4452, "step": 5442 }, { "epoch": 2.292432963638916, "grad_norm": 0.3750469982624054, "learning_rate": 1.5999491665471123e-06, "loss": 0.3522, "step": 5443 }, { "epoch": 2.292854134493893, "grad_norm": 0.4157281219959259, "learning_rate": 1.5981525520413283e-06, "loss": 0.4088, "step": 5444 }, { "epoch": 2.2932753053488697, "grad_norm": 0.46731242537498474, "learning_rate": 1.5963567549344028e-06, "loss": 0.4087, "step": 5445 }, { "epoch": 2.2936964762038468, "grad_norm": 0.4440303146839142, "learning_rate": 1.5945617756578347e-06, "loss": 0.3644, "step": 5446 }, { "epoch": 2.2941176470588234, "grad_norm": 0.42292332649230957, "learning_rate": 1.5927676146429216e-06, "loss": 0.397, "step": 5447 }, { "epoch": 2.2945388179138004, "grad_norm": 0.4423665702342987, "learning_rate": 1.5909742723207643e-06, "loss": 0.4384, "step": 5448 }, { "epoch": 2.294959988768777, "grad_norm": 0.4453548789024353, "learning_rate": 1.5891817491222727e-06, "loss": 0.4306, "step": 5449 }, { "epoch": 2.295381159623754, "grad_norm": 0.3647925853729248, "learning_rate": 1.5873900454781516e-06, "loss": 0.3673, "step": 5450 }, { "epoch": 2.2958023304787307, "grad_norm": 0.47252944111824036, "learning_rate": 1.5855991618189165e-06, "loss": 0.4598, "step": 5451 }, { "epoch": 2.296223501333708, "grad_norm": 0.4323665499687195, "learning_rate": 1.5838090985748816e-06, "loss": 0.4418, "step": 5452 }, { "epoch": 2.2966446721886844, "grad_norm": 0.40390846133232117, "learning_rate": 1.5820198561761624e-06, "loss": 0.39, "step": 5453 }, { "epoch": 2.2970658430436615, "grad_norm": 0.43530964851379395, "learning_rate": 1.5802314350526837e-06, "loss": 0.4279, "step": 5454 }, { "epoch": 2.297487013898638, "grad_norm": 0.40049391984939575, "learning_rate": 1.5784438356341648e-06, "loss": 0.3419, "step": 5455 }, { "epoch": 2.297908184753615, "grad_norm": 0.4841368496417999, "learning_rate": 1.576657058350135e-06, "loss": 0.4597, "step": 5456 }, { "epoch": 2.2983293556085918, "grad_norm": 0.43318989872932434, "learning_rate": 1.574871103629922e-06, "loss": 0.3762, "step": 5457 }, { "epoch": 2.298750526463569, "grad_norm": 0.5277498364448547, "learning_rate": 1.5730859719026536e-06, "loss": 0.4346, "step": 5458 }, { "epoch": 2.2991716973185454, "grad_norm": 0.46355727314949036, "learning_rate": 1.571301663597265e-06, "loss": 0.3859, "step": 5459 }, { "epoch": 2.2995928681735225, "grad_norm": 0.42413172125816345, "learning_rate": 1.5695181791424935e-06, "loss": 0.4138, "step": 5460 }, { "epoch": 2.300014039028499, "grad_norm": 0.4612480401992798, "learning_rate": 1.5677355189668736e-06, "loss": 0.4231, "step": 5461 }, { "epoch": 2.300435209883476, "grad_norm": 0.4118973910808563, "learning_rate": 1.5659536834987437e-06, "loss": 0.3874, "step": 5462 }, { "epoch": 2.3008563807384528, "grad_norm": 0.4502788484096527, "learning_rate": 1.5641726731662483e-06, "loss": 0.386, "step": 5463 }, { "epoch": 2.30127755159343, "grad_norm": 0.4748261868953705, "learning_rate": 1.5623924883973257e-06, "loss": 0.399, "step": 5464 }, { "epoch": 2.3016987224484065, "grad_norm": 0.48828694224357605, "learning_rate": 1.560613129619724e-06, "loss": 0.4537, "step": 5465 }, { "epoch": 2.3021198933033835, "grad_norm": 0.4186502993106842, "learning_rate": 1.5588345972609874e-06, "loss": 0.3828, "step": 5466 }, { "epoch": 2.30254106415836, "grad_norm": 0.4068475365638733, "learning_rate": 1.5570568917484623e-06, "loss": 0.4176, "step": 5467 }, { "epoch": 2.302962235013337, "grad_norm": 0.4517064690589905, "learning_rate": 1.5552800135093e-06, "loss": 0.4427, "step": 5468 }, { "epoch": 2.303383405868314, "grad_norm": 0.45322149991989136, "learning_rate": 1.5535039629704467e-06, "loss": 0.4752, "step": 5469 }, { "epoch": 2.303804576723291, "grad_norm": 0.41946908831596375, "learning_rate": 1.5517287405586574e-06, "loss": 0.3755, "step": 5470 }, { "epoch": 2.3042257475782675, "grad_norm": 0.39515894651412964, "learning_rate": 1.5499543467004812e-06, "loss": 0.3372, "step": 5471 }, { "epoch": 2.3046469184332445, "grad_norm": 0.43081986904144287, "learning_rate": 1.5481807818222711e-06, "loss": 0.4507, "step": 5472 }, { "epoch": 2.305068089288221, "grad_norm": 0.4445647895336151, "learning_rate": 1.546408046350183e-06, "loss": 0.3686, "step": 5473 }, { "epoch": 2.305489260143198, "grad_norm": 0.42889517545700073, "learning_rate": 1.544636140710168e-06, "loss": 0.3655, "step": 5474 }, { "epoch": 2.305910430998175, "grad_norm": 0.3704506754875183, "learning_rate": 1.5428650653279831e-06, "loss": 0.3554, "step": 5475 }, { "epoch": 2.306331601853152, "grad_norm": 0.4353524148464203, "learning_rate": 1.541094820629187e-06, "loss": 0.4693, "step": 5476 }, { "epoch": 2.3067527727081285, "grad_norm": 0.3978211283683777, "learning_rate": 1.5393254070391289e-06, "loss": 0.3589, "step": 5477 }, { "epoch": 2.3071739435631056, "grad_norm": 0.41211384534835815, "learning_rate": 1.5375568249829692e-06, "loss": 0.3711, "step": 5478 }, { "epoch": 2.307595114418082, "grad_norm": 0.41410574316978455, "learning_rate": 1.5357890748856646e-06, "loss": 0.4109, "step": 5479 }, { "epoch": 2.3080162852730592, "grad_norm": 0.4056819677352905, "learning_rate": 1.5340221571719694e-06, "loss": 0.3768, "step": 5480 }, { "epoch": 2.308437456128036, "grad_norm": 0.45377227663993835, "learning_rate": 1.5322560722664431e-06, "loss": 0.4624, "step": 5481 }, { "epoch": 2.308858626983013, "grad_norm": 0.41205281019210815, "learning_rate": 1.53049082059344e-06, "loss": 0.4002, "step": 5482 }, { "epoch": 2.3092797978379895, "grad_norm": 0.41950905323028564, "learning_rate": 1.5287264025771154e-06, "loss": 0.3686, "step": 5483 }, { "epoch": 2.3097009686929666, "grad_norm": 0.44185036420822144, "learning_rate": 1.5269628186414282e-06, "loss": 0.4569, "step": 5484 }, { "epoch": 2.310122139547943, "grad_norm": 0.43960341811180115, "learning_rate": 1.5252000692101304e-06, "loss": 0.4404, "step": 5485 }, { "epoch": 2.3105433104029203, "grad_norm": 0.4169919788837433, "learning_rate": 1.52343815470678e-06, "loss": 0.3727, "step": 5486 }, { "epoch": 2.310964481257897, "grad_norm": 0.3567759096622467, "learning_rate": 1.5216770755547305e-06, "loss": 0.3342, "step": 5487 }, { "epoch": 2.311385652112874, "grad_norm": 0.43291398882865906, "learning_rate": 1.519916832177134e-06, "loss": 0.4245, "step": 5488 }, { "epoch": 2.3118068229678506, "grad_norm": 0.4011761248111725, "learning_rate": 1.5181574249969456e-06, "loss": 0.4007, "step": 5489 }, { "epoch": 2.3122279938228276, "grad_norm": 0.4244046211242676, "learning_rate": 1.516398854436914e-06, "loss": 0.405, "step": 5490 }, { "epoch": 2.3126491646778042, "grad_norm": 0.4611516296863556, "learning_rate": 1.5146411209195944e-06, "loss": 0.4543, "step": 5491 }, { "epoch": 2.3130703355327813, "grad_norm": 0.4145626127719879, "learning_rate": 1.5128842248673337e-06, "loss": 0.3924, "step": 5492 }, { "epoch": 2.313491506387758, "grad_norm": 0.48494184017181396, "learning_rate": 1.5111281667022798e-06, "loss": 0.4305, "step": 5493 }, { "epoch": 2.313912677242735, "grad_norm": 0.46522510051727295, "learning_rate": 1.5093729468463807e-06, "loss": 0.4292, "step": 5494 }, { "epoch": 2.3143338480977116, "grad_norm": 0.40096521377563477, "learning_rate": 1.5076185657213854e-06, "loss": 0.3497, "step": 5495 }, { "epoch": 2.3147550189526886, "grad_norm": 0.44094347953796387, "learning_rate": 1.5058650237488325e-06, "loss": 0.3968, "step": 5496 }, { "epoch": 2.3151761898076653, "grad_norm": 0.4283604621887207, "learning_rate": 1.5041123213500675e-06, "loss": 0.3498, "step": 5497 }, { "epoch": 2.3155973606626423, "grad_norm": 0.3863462209701538, "learning_rate": 1.502360458946232e-06, "loss": 0.4073, "step": 5498 }, { "epoch": 2.316018531517619, "grad_norm": 0.44523903727531433, "learning_rate": 1.5006094369582624e-06, "loss": 0.445, "step": 5499 }, { "epoch": 2.316439702372596, "grad_norm": 0.43440476059913635, "learning_rate": 1.498859255806901e-06, "loss": 0.3937, "step": 5500 }, { "epoch": 2.3168608732275726, "grad_norm": 0.4039941430091858, "learning_rate": 1.4971099159126762e-06, "loss": 0.3653, "step": 5501 }, { "epoch": 2.3172820440825497, "grad_norm": 0.3445957899093628, "learning_rate": 1.4953614176959236e-06, "loss": 0.4031, "step": 5502 }, { "epoch": 2.3177032149375263, "grad_norm": 0.4293925166130066, "learning_rate": 1.4936137615767759e-06, "loss": 0.4159, "step": 5503 }, { "epoch": 2.3181243857925034, "grad_norm": 0.36166277527809143, "learning_rate": 1.4918669479751585e-06, "loss": 0.3598, "step": 5504 }, { "epoch": 2.31854555664748, "grad_norm": 0.48388198018074036, "learning_rate": 1.4901209773108e-06, "loss": 0.4555, "step": 5505 }, { "epoch": 2.318966727502457, "grad_norm": 0.4040488600730896, "learning_rate": 1.4883758500032224e-06, "loss": 0.3681, "step": 5506 }, { "epoch": 2.3193878983574336, "grad_norm": 0.41896963119506836, "learning_rate": 1.486631566471745e-06, "loss": 0.4344, "step": 5507 }, { "epoch": 2.3198090692124103, "grad_norm": 0.45450347661972046, "learning_rate": 1.4848881271354882e-06, "loss": 0.4237, "step": 5508 }, { "epoch": 2.3202302400673873, "grad_norm": 0.414223849773407, "learning_rate": 1.4831455324133648e-06, "loss": 0.3718, "step": 5509 }, { "epoch": 2.3206514109223644, "grad_norm": 0.41491398215293884, "learning_rate": 1.4814037827240896e-06, "loss": 0.3919, "step": 5510 }, { "epoch": 2.321072581777341, "grad_norm": 0.4174526631832123, "learning_rate": 1.4796628784861705e-06, "loss": 0.3999, "step": 5511 }, { "epoch": 2.3214937526323176, "grad_norm": 0.4711069166660309, "learning_rate": 1.4779228201179113e-06, "loss": 0.4658, "step": 5512 }, { "epoch": 2.3219149234872947, "grad_norm": 0.4024253785610199, "learning_rate": 1.4761836080374187e-06, "loss": 0.3874, "step": 5513 }, { "epoch": 2.3223360943422717, "grad_norm": 0.3930884599685669, "learning_rate": 1.4744452426625882e-06, "loss": 0.3662, "step": 5514 }, { "epoch": 2.3227572651972483, "grad_norm": 0.4373299777507782, "learning_rate": 1.47270772441112e-06, "loss": 0.4945, "step": 5515 }, { "epoch": 2.323178436052225, "grad_norm": 0.3611724078655243, "learning_rate": 1.4709710537005033e-06, "loss": 0.2989, "step": 5516 }, { "epoch": 2.323599606907202, "grad_norm": 0.4267372488975525, "learning_rate": 1.4692352309480268e-06, "loss": 0.4499, "step": 5517 }, { "epoch": 2.324020777762179, "grad_norm": 0.40056514739990234, "learning_rate": 1.4675002565707757e-06, "loss": 0.4081, "step": 5518 }, { "epoch": 2.3244419486171557, "grad_norm": 0.3942815363407135, "learning_rate": 1.4657661309856342e-06, "loss": 0.3988, "step": 5519 }, { "epoch": 2.3248631194721323, "grad_norm": 0.4066673517227173, "learning_rate": 1.464032854609277e-06, "loss": 0.3682, "step": 5520 }, { "epoch": 2.3252842903271094, "grad_norm": 0.4411332905292511, "learning_rate": 1.4623004278581754e-06, "loss": 0.4014, "step": 5521 }, { "epoch": 2.3257054611820864, "grad_norm": 0.42745542526245117, "learning_rate": 1.4605688511486022e-06, "loss": 0.4138, "step": 5522 }, { "epoch": 2.326126632037063, "grad_norm": 0.4615003764629364, "learning_rate": 1.4588381248966188e-06, "loss": 0.4373, "step": 5523 }, { "epoch": 2.3265478028920397, "grad_norm": 0.38901832699775696, "learning_rate": 1.457108249518089e-06, "loss": 0.3689, "step": 5524 }, { "epoch": 2.3269689737470167, "grad_norm": 0.428190678358078, "learning_rate": 1.4553792254286669e-06, "loss": 0.4855, "step": 5525 }, { "epoch": 2.327390144601994, "grad_norm": 0.4155168831348419, "learning_rate": 1.4536510530438026e-06, "loss": 0.3975, "step": 5526 }, { "epoch": 2.3278113154569704, "grad_norm": 0.40195155143737793, "learning_rate": 1.451923732778745e-06, "loss": 0.4144, "step": 5527 }, { "epoch": 2.328232486311947, "grad_norm": 0.45777687430381775, "learning_rate": 1.4501972650485342e-06, "loss": 0.4007, "step": 5528 }, { "epoch": 2.328653657166924, "grad_norm": 0.4151974320411682, "learning_rate": 1.4484716502680102e-06, "loss": 0.4049, "step": 5529 }, { "epoch": 2.329074828021901, "grad_norm": 0.3847988247871399, "learning_rate": 1.4467468888518033e-06, "loss": 0.368, "step": 5530 }, { "epoch": 2.3294959988768777, "grad_norm": 0.39123156666755676, "learning_rate": 1.4450229812143395e-06, "loss": 0.4138, "step": 5531 }, { "epoch": 2.3299171697318544, "grad_norm": 0.41254597902297974, "learning_rate": 1.443299927769844e-06, "loss": 0.4235, "step": 5532 }, { "epoch": 2.3303383405868314, "grad_norm": 0.46229398250579834, "learning_rate": 1.4415777289323296e-06, "loss": 0.4379, "step": 5533 }, { "epoch": 2.330759511441808, "grad_norm": 0.4405566155910492, "learning_rate": 1.4398563851156105e-06, "loss": 0.4205, "step": 5534 }, { "epoch": 2.331180682296785, "grad_norm": 0.39382699131965637, "learning_rate": 1.4381358967332949e-06, "loss": 0.3732, "step": 5535 }, { "epoch": 2.3316018531517617, "grad_norm": 0.40256619453430176, "learning_rate": 1.4364162641987777e-06, "loss": 0.4161, "step": 5536 }, { "epoch": 2.3320230240067388, "grad_norm": 0.4689268469810486, "learning_rate": 1.4346974879252561e-06, "loss": 0.5149, "step": 5537 }, { "epoch": 2.3324441948617154, "grad_norm": 0.38406509160995483, "learning_rate": 1.4329795683257213e-06, "loss": 0.3605, "step": 5538 }, { "epoch": 2.3328653657166925, "grad_norm": 0.4408702254295349, "learning_rate": 1.4312625058129532e-06, "loss": 0.3819, "step": 5539 }, { "epoch": 2.333286536571669, "grad_norm": 0.41907790303230286, "learning_rate": 1.429546300799532e-06, "loss": 0.4049, "step": 5540 }, { "epoch": 2.333707707426646, "grad_norm": 0.5186551809310913, "learning_rate": 1.4278309536978275e-06, "loss": 0.4208, "step": 5541 }, { "epoch": 2.3341288782816227, "grad_norm": 0.42376986145973206, "learning_rate": 1.4261164649200031e-06, "loss": 0.3914, "step": 5542 }, { "epoch": 2.3345500491366, "grad_norm": 0.41305869817733765, "learning_rate": 1.424402834878021e-06, "loss": 0.4076, "step": 5543 }, { "epoch": 2.3349712199915764, "grad_norm": 0.4198814034461975, "learning_rate": 1.4226900639836306e-06, "loss": 0.4135, "step": 5544 }, { "epoch": 2.3353923908465535, "grad_norm": 0.3862283229827881, "learning_rate": 1.4209781526483813e-06, "loss": 0.4157, "step": 5545 }, { "epoch": 2.33581356170153, "grad_norm": 0.37761685252189636, "learning_rate": 1.4192671012836107e-06, "loss": 0.3849, "step": 5546 }, { "epoch": 2.336234732556507, "grad_norm": 0.3887862265110016, "learning_rate": 1.41755691030045e-06, "loss": 0.4076, "step": 5547 }, { "epoch": 2.3366559034114838, "grad_norm": 0.4306923449039459, "learning_rate": 1.4158475801098287e-06, "loss": 0.4085, "step": 5548 }, { "epoch": 2.337077074266461, "grad_norm": 0.38409504294395447, "learning_rate": 1.4141391111224634e-06, "loss": 0.4143, "step": 5549 }, { "epoch": 2.3374982451214374, "grad_norm": 0.43598076701164246, "learning_rate": 1.4124315037488695e-06, "loss": 0.3814, "step": 5550 }, { "epoch": 2.3379194159764145, "grad_norm": 0.42448171973228455, "learning_rate": 1.4107247583993505e-06, "loss": 0.4006, "step": 5551 }, { "epoch": 2.338340586831391, "grad_norm": 0.4323934018611908, "learning_rate": 1.4090188754840028e-06, "loss": 0.4042, "step": 5552 }, { "epoch": 2.338761757686368, "grad_norm": 0.4468924403190613, "learning_rate": 1.4073138554127198e-06, "loss": 0.464, "step": 5553 }, { "epoch": 2.339182928541345, "grad_norm": 0.3856155276298523, "learning_rate": 1.405609698595186e-06, "loss": 0.3695, "step": 5554 }, { "epoch": 2.339604099396322, "grad_norm": 0.3978240191936493, "learning_rate": 1.403906405440877e-06, "loss": 0.4347, "step": 5555 }, { "epoch": 2.3400252702512985, "grad_norm": 0.38714414834976196, "learning_rate": 1.4022039763590595e-06, "loss": 0.3813, "step": 5556 }, { "epoch": 2.3404464411062755, "grad_norm": 0.4138546586036682, "learning_rate": 1.4005024117587972e-06, "loss": 0.4283, "step": 5557 }, { "epoch": 2.340867611961252, "grad_norm": 0.44460514187812805, "learning_rate": 1.3988017120489417e-06, "loss": 0.4288, "step": 5558 }, { "epoch": 2.341288782816229, "grad_norm": 0.4453619420528412, "learning_rate": 1.3971018776381407e-06, "loss": 0.4312, "step": 5559 }, { "epoch": 2.341709953671206, "grad_norm": 0.43773356080055237, "learning_rate": 1.3954029089348304e-06, "loss": 0.4253, "step": 5560 }, { "epoch": 2.342131124526183, "grad_norm": 0.4006829559803009, "learning_rate": 1.3937048063472392e-06, "loss": 0.3683, "step": 5561 }, { "epoch": 2.3425522953811595, "grad_norm": 0.42461085319519043, "learning_rate": 1.392007570283392e-06, "loss": 0.4299, "step": 5562 }, { "epoch": 2.3429734662361366, "grad_norm": 0.42872217297554016, "learning_rate": 1.3903112011510983e-06, "loss": 0.3918, "step": 5563 }, { "epoch": 2.343394637091113, "grad_norm": 0.41748568415641785, "learning_rate": 1.388615699357967e-06, "loss": 0.4338, "step": 5564 }, { "epoch": 2.3438158079460902, "grad_norm": 0.39494869112968445, "learning_rate": 1.3869210653113923e-06, "loss": 0.3755, "step": 5565 }, { "epoch": 2.344236978801067, "grad_norm": 0.45707079768180847, "learning_rate": 1.3852272994185623e-06, "loss": 0.4606, "step": 5566 }, { "epoch": 2.344658149656044, "grad_norm": 0.43793776631355286, "learning_rate": 1.3835344020864584e-06, "loss": 0.4181, "step": 5567 }, { "epoch": 2.3450793205110205, "grad_norm": 0.43323424458503723, "learning_rate": 1.3818423737218484e-06, "loss": 0.4092, "step": 5568 }, { "epoch": 2.3455004913659976, "grad_norm": 0.4402366578578949, "learning_rate": 1.3801512147312967e-06, "loss": 0.423, "step": 5569 }, { "epoch": 2.345921662220974, "grad_norm": 0.4327864944934845, "learning_rate": 1.3784609255211585e-06, "loss": 0.3993, "step": 5570 }, { "epoch": 2.3463428330759513, "grad_norm": 0.3925985097885132, "learning_rate": 1.3767715064975729e-06, "loss": 0.4005, "step": 5571 }, { "epoch": 2.346764003930928, "grad_norm": 0.3819129765033722, "learning_rate": 1.375082958066478e-06, "loss": 0.3949, "step": 5572 }, { "epoch": 2.347185174785905, "grad_norm": 0.3716142773628235, "learning_rate": 1.3733952806335987e-06, "loss": 0.4036, "step": 5573 }, { "epoch": 2.3476063456408816, "grad_norm": 0.37480777502059937, "learning_rate": 1.3717084746044511e-06, "loss": 0.3921, "step": 5574 }, { "epoch": 2.3480275164958586, "grad_norm": 0.4097791910171509, "learning_rate": 1.370022540384347e-06, "loss": 0.3937, "step": 5575 }, { "epoch": 2.3484486873508352, "grad_norm": 0.4091678261756897, "learning_rate": 1.3683374783783776e-06, "loss": 0.4489, "step": 5576 }, { "epoch": 2.3488698582058123, "grad_norm": 0.42679256200790405, "learning_rate": 1.3666532889914336e-06, "loss": 0.3784, "step": 5577 }, { "epoch": 2.349291029060789, "grad_norm": 0.44232574105262756, "learning_rate": 1.3649699726281956e-06, "loss": 0.459, "step": 5578 }, { "epoch": 2.349712199915766, "grad_norm": 0.40091609954833984, "learning_rate": 1.3632875296931287e-06, "loss": 0.4122, "step": 5579 }, { "epoch": 2.3501333707707426, "grad_norm": 0.41784417629241943, "learning_rate": 1.3616059605904953e-06, "loss": 0.3815, "step": 5580 }, { "epoch": 2.3505545416257196, "grad_norm": 0.4560774862766266, "learning_rate": 1.3599252657243427e-06, "loss": 0.4292, "step": 5581 }, { "epoch": 2.3509757124806963, "grad_norm": 0.3934994339942932, "learning_rate": 1.3582454454985083e-06, "loss": 0.3611, "step": 5582 }, { "epoch": 2.3513968833356733, "grad_norm": 0.408246785402298, "learning_rate": 1.3565665003166233e-06, "loss": 0.3814, "step": 5583 }, { "epoch": 2.35181805419065, "grad_norm": 0.41889646649360657, "learning_rate": 1.3548884305821032e-06, "loss": 0.4483, "step": 5584 }, { "epoch": 2.352239225045627, "grad_norm": 0.43661418557167053, "learning_rate": 1.3532112366981598e-06, "loss": 0.4212, "step": 5585 }, { "epoch": 2.3526603959006036, "grad_norm": 0.4788654148578644, "learning_rate": 1.3515349190677879e-06, "loss": 0.39, "step": 5586 }, { "epoch": 2.3530815667555807, "grad_norm": 0.41531744599342346, "learning_rate": 1.3498594780937745e-06, "loss": 0.4246, "step": 5587 }, { "epoch": 2.3535027376105573, "grad_norm": 0.40838900208473206, "learning_rate": 1.3481849141786979e-06, "loss": 0.3901, "step": 5588 }, { "epoch": 2.3539239084655343, "grad_norm": 0.4207264184951782, "learning_rate": 1.3465112277249214e-06, "loss": 0.3696, "step": 5589 }, { "epoch": 2.354345079320511, "grad_norm": 0.46230897307395935, "learning_rate": 1.3448384191346026e-06, "loss": 0.4268, "step": 5590 }, { "epoch": 2.354766250175488, "grad_norm": 0.38155415654182434, "learning_rate": 1.343166488809684e-06, "loss": 0.3666, "step": 5591 }, { "epoch": 2.3551874210304646, "grad_norm": 0.4335622489452362, "learning_rate": 1.3414954371518968e-06, "loss": 0.4357, "step": 5592 }, { "epoch": 2.3556085918854417, "grad_norm": 0.3783186972141266, "learning_rate": 1.339825264562764e-06, "loss": 0.3835, "step": 5593 }, { "epoch": 2.3560297627404183, "grad_norm": 0.3685537278652191, "learning_rate": 1.3381559714435988e-06, "loss": 0.3561, "step": 5594 }, { "epoch": 2.3564509335953954, "grad_norm": 0.4427870213985443, "learning_rate": 1.3364875581954973e-06, "loss": 0.446, "step": 5595 }, { "epoch": 2.356872104450372, "grad_norm": 0.37462329864501953, "learning_rate": 1.3348200252193466e-06, "loss": 0.3391, "step": 5596 }, { "epoch": 2.357293275305349, "grad_norm": 0.3841214179992676, "learning_rate": 1.3331533729158263e-06, "loss": 0.4125, "step": 5597 }, { "epoch": 2.3577144461603257, "grad_norm": 0.4668005704879761, "learning_rate": 1.3314876016853978e-06, "loss": 0.4479, "step": 5598 }, { "epoch": 2.3581356170153027, "grad_norm": 0.42758768796920776, "learning_rate": 1.3298227119283164e-06, "loss": 0.441, "step": 5599 }, { "epoch": 2.3585567878702793, "grad_norm": 0.3923018276691437, "learning_rate": 1.3281587040446225e-06, "loss": 0.3756, "step": 5600 }, { "epoch": 2.3589779587252564, "grad_norm": 0.4480264186859131, "learning_rate": 1.3264955784341437e-06, "loss": 0.4251, "step": 5601 }, { "epoch": 2.359399129580233, "grad_norm": 0.41321277618408203, "learning_rate": 1.3248333354965003e-06, "loss": 0.3973, "step": 5602 }, { "epoch": 2.35982030043521, "grad_norm": 0.42859694361686707, "learning_rate": 1.3231719756310946e-06, "loss": 0.3968, "step": 5603 }, { "epoch": 2.3602414712901867, "grad_norm": 0.4397719204425812, "learning_rate": 1.3215114992371219e-06, "loss": 0.379, "step": 5604 }, { "epoch": 2.3606626421451637, "grad_norm": 0.3651455044746399, "learning_rate": 1.3198519067135622e-06, "loss": 0.3768, "step": 5605 }, { "epoch": 2.3610838130001404, "grad_norm": 0.419436514377594, "learning_rate": 1.3181931984591823e-06, "loss": 0.43, "step": 5606 }, { "epoch": 2.3615049838551174, "grad_norm": 0.40943437814712524, "learning_rate": 1.31653537487254e-06, "loss": 0.3923, "step": 5607 }, { "epoch": 2.361926154710094, "grad_norm": 0.4453161358833313, "learning_rate": 1.3148784363519774e-06, "loss": 0.4174, "step": 5608 }, { "epoch": 2.3623473255650707, "grad_norm": 0.47827962040901184, "learning_rate": 1.3132223832956265e-06, "loss": 0.4608, "step": 5609 }, { "epoch": 2.3627684964200477, "grad_norm": 0.41883695125579834, "learning_rate": 1.3115672161014042e-06, "loss": 0.3917, "step": 5610 }, { "epoch": 2.3631896672750248, "grad_norm": 0.39846307039260864, "learning_rate": 1.3099129351670143e-06, "loss": 0.372, "step": 5611 }, { "epoch": 2.3636108381300014, "grad_norm": 0.403603732585907, "learning_rate": 1.30825954088995e-06, "loss": 0.3794, "step": 5612 }, { "epoch": 2.364032008984978, "grad_norm": 0.46380123496055603, "learning_rate": 1.3066070336674924e-06, "loss": 0.4187, "step": 5613 }, { "epoch": 2.364453179839955, "grad_norm": 0.4623165428638458, "learning_rate": 1.3049554138967052e-06, "loss": 0.4395, "step": 5614 }, { "epoch": 2.364874350694932, "grad_norm": 0.4142037630081177, "learning_rate": 1.3033046819744398e-06, "loss": 0.3666, "step": 5615 }, { "epoch": 2.3652955215499087, "grad_norm": 0.40834683179855347, "learning_rate": 1.3016548382973387e-06, "loss": 0.3521, "step": 5616 }, { "epoch": 2.3657166924048854, "grad_norm": 0.4385419487953186, "learning_rate": 1.300005883261824e-06, "loss": 0.4025, "step": 5617 }, { "epoch": 2.3661378632598624, "grad_norm": 0.40691861510276794, "learning_rate": 1.298357817264112e-06, "loss": 0.4359, "step": 5618 }, { "epoch": 2.3665590341148395, "grad_norm": 0.3630817234516144, "learning_rate": 1.2967106407001994e-06, "loss": 0.3705, "step": 5619 }, { "epoch": 2.366980204969816, "grad_norm": 0.452479749917984, "learning_rate": 1.2950643539658696e-06, "loss": 0.4174, "step": 5620 }, { "epoch": 2.3674013758247927, "grad_norm": 0.38667577505111694, "learning_rate": 1.2934189574566975e-06, "loss": 0.3558, "step": 5621 }, { "epoch": 2.3678225466797698, "grad_norm": 0.4066520929336548, "learning_rate": 1.2917744515680368e-06, "loss": 0.4271, "step": 5622 }, { "epoch": 2.368243717534747, "grad_norm": 0.40639352798461914, "learning_rate": 1.2901308366950337e-06, "loss": 0.3486, "step": 5623 }, { "epoch": 2.3686648883897234, "grad_norm": 0.39829063415527344, "learning_rate": 1.2884881132326166e-06, "loss": 0.3734, "step": 5624 }, { "epoch": 2.3690860592447, "grad_norm": 0.4402541518211365, "learning_rate": 1.2868462815754985e-06, "loss": 0.4188, "step": 5625 }, { "epoch": 2.369507230099677, "grad_norm": 0.3997698426246643, "learning_rate": 1.2852053421181826e-06, "loss": 0.3689, "step": 5626 }, { "epoch": 2.369928400954654, "grad_norm": 0.4169977307319641, "learning_rate": 1.2835652952549538e-06, "loss": 0.391, "step": 5627 }, { "epoch": 2.370349571809631, "grad_norm": 0.4114128053188324, "learning_rate": 1.281926141379884e-06, "loss": 0.3735, "step": 5628 }, { "epoch": 2.3707707426646074, "grad_norm": 0.4383161664009094, "learning_rate": 1.280287880886834e-06, "loss": 0.4328, "step": 5629 }, { "epoch": 2.3711919135195845, "grad_norm": 0.44501203298568726, "learning_rate": 1.278650514169441e-06, "loss": 0.4198, "step": 5630 }, { "epoch": 2.371613084374561, "grad_norm": 0.45732173323631287, "learning_rate": 1.277014041621137e-06, "loss": 0.3985, "step": 5631 }, { "epoch": 2.372034255229538, "grad_norm": 0.40864160656929016, "learning_rate": 1.2753784636351314e-06, "loss": 0.3846, "step": 5632 }, { "epoch": 2.3724554260845148, "grad_norm": 0.42751166224479675, "learning_rate": 1.2737437806044245e-06, "loss": 0.4264, "step": 5633 }, { "epoch": 2.372876596939492, "grad_norm": 0.4620552062988281, "learning_rate": 1.2721099929218023e-06, "loss": 0.4039, "step": 5634 }, { "epoch": 2.3732977677944684, "grad_norm": 0.4668242037296295, "learning_rate": 1.2704771009798266e-06, "loss": 0.411, "step": 5635 }, { "epoch": 2.3737189386494455, "grad_norm": 0.4463803768157959, "learning_rate": 1.2688451051708534e-06, "loss": 0.3816, "step": 5636 }, { "epoch": 2.374140109504422, "grad_norm": 0.4228694438934326, "learning_rate": 1.2672140058870214e-06, "loss": 0.4037, "step": 5637 }, { "epoch": 2.374561280359399, "grad_norm": 0.40759706497192383, "learning_rate": 1.2655838035202496e-06, "loss": 0.4307, "step": 5638 }, { "epoch": 2.374982451214376, "grad_norm": 0.3832850754261017, "learning_rate": 1.2639544984622464e-06, "loss": 0.3823, "step": 5639 }, { "epoch": 2.375403622069353, "grad_norm": 0.45647677779197693, "learning_rate": 1.2623260911045032e-06, "loss": 0.4191, "step": 5640 }, { "epoch": 2.3758247929243295, "grad_norm": 0.4578202962875366, "learning_rate": 1.2606985818382917e-06, "loss": 0.4652, "step": 5641 }, { "epoch": 2.3762459637793065, "grad_norm": 0.40592989325523376, "learning_rate": 1.2590719710546756e-06, "loss": 0.3655, "step": 5642 }, { "epoch": 2.376667134634283, "grad_norm": 0.47635817527770996, "learning_rate": 1.257446259144494e-06, "loss": 0.4505, "step": 5643 }, { "epoch": 2.37708830548926, "grad_norm": 0.36531010270118713, "learning_rate": 1.2558214464983791e-06, "loss": 0.3625, "step": 5644 }, { "epoch": 2.377509476344237, "grad_norm": 0.4074103832244873, "learning_rate": 1.2541975335067392e-06, "loss": 0.437, "step": 5645 }, { "epoch": 2.377930647199214, "grad_norm": 0.4142265021800995, "learning_rate": 1.252574520559769e-06, "loss": 0.3741, "step": 5646 }, { "epoch": 2.3783518180541905, "grad_norm": 0.44454169273376465, "learning_rate": 1.25095240804745e-06, "loss": 0.4196, "step": 5647 }, { "epoch": 2.3787729889091676, "grad_norm": 0.3850487768650055, "learning_rate": 1.2493311963595417e-06, "loss": 0.4048, "step": 5648 }, { "epoch": 2.379194159764144, "grad_norm": 0.4339967668056488, "learning_rate": 1.247710885885594e-06, "loss": 0.3692, "step": 5649 }, { "epoch": 2.3796153306191212, "grad_norm": 0.39835652709007263, "learning_rate": 1.246091477014934e-06, "loss": 0.4076, "step": 5650 }, { "epoch": 2.380036501474098, "grad_norm": 0.4321899116039276, "learning_rate": 1.2444729701366742e-06, "loss": 0.4258, "step": 5651 }, { "epoch": 2.380457672329075, "grad_norm": 0.41538339853286743, "learning_rate": 1.2428553656397112e-06, "loss": 0.4262, "step": 5652 }, { "epoch": 2.3808788431840515, "grad_norm": 0.4142656922340393, "learning_rate": 1.2412386639127272e-06, "loss": 0.3759, "step": 5653 }, { "epoch": 2.3813000140390286, "grad_norm": 0.4400211572647095, "learning_rate": 1.239622865344182e-06, "loss": 0.4245, "step": 5654 }, { "epoch": 2.381721184894005, "grad_norm": 0.39113789796829224, "learning_rate": 1.2380079703223201e-06, "loss": 0.3785, "step": 5655 }, { "epoch": 2.3821423557489823, "grad_norm": 0.42455247044563293, "learning_rate": 1.2363939792351732e-06, "loss": 0.4407, "step": 5656 }, { "epoch": 2.382563526603959, "grad_norm": 0.35107260942459106, "learning_rate": 1.234780892470549e-06, "loss": 0.3812, "step": 5657 }, { "epoch": 2.382984697458936, "grad_norm": 0.4298626482486725, "learning_rate": 1.2331687104160445e-06, "loss": 0.4517, "step": 5658 }, { "epoch": 2.3834058683139125, "grad_norm": 0.4052506387233734, "learning_rate": 1.2315574334590347e-06, "loss": 0.3952, "step": 5659 }, { "epoch": 2.3838270391688896, "grad_norm": 0.39985716342926025, "learning_rate": 1.2299470619866778e-06, "loss": 0.4018, "step": 5660 }, { "epoch": 2.384248210023866, "grad_norm": 0.39212384819984436, "learning_rate": 1.2283375963859169e-06, "loss": 0.3538, "step": 5661 }, { "epoch": 2.3846693808788433, "grad_norm": 0.42626136541366577, "learning_rate": 1.2267290370434738e-06, "loss": 0.4299, "step": 5662 }, { "epoch": 2.38509055173382, "grad_norm": 0.40196850895881653, "learning_rate": 1.225121384345857e-06, "loss": 0.3862, "step": 5663 }, { "epoch": 2.385511722588797, "grad_norm": 0.4045345187187195, "learning_rate": 1.2235146386793534e-06, "loss": 0.4255, "step": 5664 }, { "epoch": 2.3859328934437736, "grad_norm": 0.36634406447410583, "learning_rate": 1.2219088004300323e-06, "loss": 0.3995, "step": 5665 }, { "epoch": 2.3863540642987506, "grad_norm": 0.44755250215530396, "learning_rate": 1.2203038699837482e-06, "loss": 0.3971, "step": 5666 }, { "epoch": 2.3867752351537272, "grad_norm": 0.43516629934310913, "learning_rate": 1.2186998477261324e-06, "loss": 0.377, "step": 5667 }, { "epoch": 2.3871964060087043, "grad_norm": 0.42825737595558167, "learning_rate": 1.2170967340426021e-06, "loss": 0.4156, "step": 5668 }, { "epoch": 2.387617576863681, "grad_norm": 0.3807564675807953, "learning_rate": 1.2154945293183584e-06, "loss": 0.3694, "step": 5669 }, { "epoch": 2.388038747718658, "grad_norm": 0.41654667258262634, "learning_rate": 1.2138932339383736e-06, "loss": 0.3887, "step": 5670 }, { "epoch": 2.3884599185736346, "grad_norm": 0.4741080403327942, "learning_rate": 1.212292848287412e-06, "loss": 0.4619, "step": 5671 }, { "epoch": 2.3888810894286117, "grad_norm": 0.45752498507499695, "learning_rate": 1.210693372750017e-06, "loss": 0.3485, "step": 5672 }, { "epoch": 2.3893022602835883, "grad_norm": 0.3876703381538391, "learning_rate": 1.2090948077105085e-06, "loss": 0.3739, "step": 5673 }, { "epoch": 2.3897234311385653, "grad_norm": 0.3811621367931366, "learning_rate": 1.207497153552995e-06, "loss": 0.4127, "step": 5674 }, { "epoch": 2.390144601993542, "grad_norm": 0.4162207841873169, "learning_rate": 1.2059004106613597e-06, "loss": 0.4088, "step": 5675 }, { "epoch": 2.390565772848519, "grad_norm": 0.414224773645401, "learning_rate": 1.2043045794192693e-06, "loss": 0.4053, "step": 5676 }, { "epoch": 2.3909869437034956, "grad_norm": 0.39741212129592896, "learning_rate": 1.2027096602101728e-06, "loss": 0.4085, "step": 5677 }, { "epoch": 2.3914081145584727, "grad_norm": 0.398966521024704, "learning_rate": 1.2011156534172969e-06, "loss": 0.3529, "step": 5678 }, { "epoch": 2.3918292854134493, "grad_norm": 0.40001043677330017, "learning_rate": 1.1995225594236538e-06, "loss": 0.3748, "step": 5679 }, { "epoch": 2.3922504562684264, "grad_norm": 0.4653705060482025, "learning_rate": 1.1979303786120316e-06, "loss": 0.4072, "step": 5680 }, { "epoch": 2.392671627123403, "grad_norm": 0.44559335708618164, "learning_rate": 1.1963391113649996e-06, "loss": 0.4197, "step": 5681 }, { "epoch": 2.39309279797838, "grad_norm": 0.4253147840499878, "learning_rate": 1.194748758064912e-06, "loss": 0.404, "step": 5682 }, { "epoch": 2.3935139688333567, "grad_norm": 0.3767615258693695, "learning_rate": 1.1931593190938972e-06, "loss": 0.3787, "step": 5683 }, { "epoch": 2.3939351396883337, "grad_norm": 0.42835932970046997, "learning_rate": 1.1915707948338702e-06, "loss": 0.4074, "step": 5684 }, { "epoch": 2.3943563105433103, "grad_norm": 0.39345675706863403, "learning_rate": 1.1899831856665206e-06, "loss": 0.3958, "step": 5685 }, { "epoch": 2.3947774813982874, "grad_norm": 0.4084545969963074, "learning_rate": 1.1883964919733205e-06, "loss": 0.4427, "step": 5686 }, { "epoch": 2.395198652253264, "grad_norm": 0.39035144448280334, "learning_rate": 1.1868107141355222e-06, "loss": 0.3467, "step": 5687 }, { "epoch": 2.395619823108241, "grad_norm": 0.4465045928955078, "learning_rate": 1.1852258525341598e-06, "loss": 0.3956, "step": 5688 }, { "epoch": 2.3960409939632177, "grad_norm": 0.45360472798347473, "learning_rate": 1.1836419075500438e-06, "loss": 0.4434, "step": 5689 }, { "epoch": 2.3964621648181947, "grad_norm": 0.4231898784637451, "learning_rate": 1.1820588795637655e-06, "loss": 0.3909, "step": 5690 }, { "epoch": 2.3968833356731714, "grad_norm": 0.4350234568119049, "learning_rate": 1.1804767689556952e-06, "loss": 0.418, "step": 5691 }, { "epoch": 2.3973045065281484, "grad_norm": 0.4073276221752167, "learning_rate": 1.178895576105985e-06, "loss": 0.4144, "step": 5692 }, { "epoch": 2.397725677383125, "grad_norm": 0.41681790351867676, "learning_rate": 1.177315301394567e-06, "loss": 0.4282, "step": 5693 }, { "epoch": 2.398146848238102, "grad_norm": 0.42281079292297363, "learning_rate": 1.1757359452011497e-06, "loss": 0.4326, "step": 5694 }, { "epoch": 2.3985680190930787, "grad_norm": 0.41887250542640686, "learning_rate": 1.1741575079052203e-06, "loss": 0.4424, "step": 5695 }, { "epoch": 2.3989891899480558, "grad_norm": 0.39291927218437195, "learning_rate": 1.1725799898860496e-06, "loss": 0.4004, "step": 5696 }, { "epoch": 2.3994103608030324, "grad_norm": 0.40710973739624023, "learning_rate": 1.171003391522683e-06, "loss": 0.3764, "step": 5697 }, { "epoch": 2.3998315316580094, "grad_norm": 0.4264136552810669, "learning_rate": 1.1694277131939496e-06, "loss": 0.4245, "step": 5698 }, { "epoch": 2.400252702512986, "grad_norm": 0.4238344430923462, "learning_rate": 1.167852955278453e-06, "loss": 0.4685, "step": 5699 }, { "epoch": 2.400673873367963, "grad_norm": 0.4479101002216339, "learning_rate": 1.1662791181545763e-06, "loss": 0.4388, "step": 5700 }, { "epoch": 2.4010950442229397, "grad_norm": 0.41401728987693787, "learning_rate": 1.1647062022004845e-06, "loss": 0.4041, "step": 5701 }, { "epoch": 2.401516215077917, "grad_norm": 0.41603899002075195, "learning_rate": 1.1631342077941178e-06, "loss": 0.3892, "step": 5702 }, { "epoch": 2.4019373859328934, "grad_norm": 0.3909715712070465, "learning_rate": 1.1615631353131979e-06, "loss": 0.3768, "step": 5703 }, { "epoch": 2.4023585567878705, "grad_norm": 0.41162556409835815, "learning_rate": 1.1599929851352226e-06, "loss": 0.4091, "step": 5704 }, { "epoch": 2.402779727642847, "grad_norm": 0.38691428303718567, "learning_rate": 1.1584237576374674e-06, "loss": 0.4023, "step": 5705 }, { "epoch": 2.4032008984978237, "grad_norm": 0.40675777196884155, "learning_rate": 1.1568554531969906e-06, "loss": 0.3968, "step": 5706 }, { "epoch": 2.4036220693528008, "grad_norm": 0.3938625156879425, "learning_rate": 1.155288072190623e-06, "loss": 0.392, "step": 5707 }, { "epoch": 2.404043240207778, "grad_norm": 0.41054508090019226, "learning_rate": 1.1537216149949786e-06, "loss": 0.4301, "step": 5708 }, { "epoch": 2.4044644110627544, "grad_norm": 0.39341112971305847, "learning_rate": 1.152156081986447e-06, "loss": 0.4072, "step": 5709 }, { "epoch": 2.404885581917731, "grad_norm": 0.4576069414615631, "learning_rate": 1.1505914735411928e-06, "loss": 0.4359, "step": 5710 }, { "epoch": 2.405306752772708, "grad_norm": 0.4356628358364105, "learning_rate": 1.1490277900351637e-06, "loss": 0.4428, "step": 5711 }, { "epoch": 2.405727923627685, "grad_norm": 0.43041396141052246, "learning_rate": 1.147465031844084e-06, "loss": 0.391, "step": 5712 }, { "epoch": 2.406149094482662, "grad_norm": 0.36728939414024353, "learning_rate": 1.1459031993434532e-06, "loss": 0.3555, "step": 5713 }, { "epoch": 2.4065702653376384, "grad_norm": 0.4170437753200531, "learning_rate": 1.144342292908549e-06, "loss": 0.4272, "step": 5714 }, { "epoch": 2.4069914361926155, "grad_norm": 0.4234541654586792, "learning_rate": 1.14278231291443e-06, "loss": 0.4089, "step": 5715 }, { "epoch": 2.4074126070475925, "grad_norm": 0.4205425977706909, "learning_rate": 1.1412232597359257e-06, "loss": 0.3957, "step": 5716 }, { "epoch": 2.407833777902569, "grad_norm": 0.4542914628982544, "learning_rate": 1.1396651337476511e-06, "loss": 0.4734, "step": 5717 }, { "epoch": 2.4082549487575458, "grad_norm": 0.41224202513694763, "learning_rate": 1.1381079353239916e-06, "loss": 0.3779, "step": 5718 }, { "epoch": 2.408676119612523, "grad_norm": 0.44128942489624023, "learning_rate": 1.1365516648391111e-06, "loss": 0.4491, "step": 5719 }, { "epoch": 2.4090972904675, "grad_norm": 0.43354684114456177, "learning_rate": 1.134996322666954e-06, "loss": 0.3531, "step": 5720 }, { "epoch": 2.4095184613224765, "grad_norm": 0.4139535427093506, "learning_rate": 1.133441909181237e-06, "loss": 0.4497, "step": 5721 }, { "epoch": 2.409939632177453, "grad_norm": 0.4483187198638916, "learning_rate": 1.131888424755459e-06, "loss": 0.4234, "step": 5722 }, { "epoch": 2.41036080303243, "grad_norm": 0.37768444418907166, "learning_rate": 1.1303358697628908e-06, "loss": 0.3463, "step": 5723 }, { "epoch": 2.4107819738874072, "grad_norm": 0.447391539812088, "learning_rate": 1.1287842445765801e-06, "loss": 0.4222, "step": 5724 }, { "epoch": 2.411203144742384, "grad_norm": 0.3947901725769043, "learning_rate": 1.1272335495693555e-06, "loss": 0.3668, "step": 5725 }, { "epoch": 2.4116243155973605, "grad_norm": 0.4308638572692871, "learning_rate": 1.125683785113817e-06, "loss": 0.3962, "step": 5726 }, { "epoch": 2.4120454864523375, "grad_norm": 0.45566385984420776, "learning_rate": 1.124134951582344e-06, "loss": 0.4061, "step": 5727 }, { "epoch": 2.4124666573073146, "grad_norm": 0.46305328607559204, "learning_rate": 1.1225870493470952e-06, "loss": 0.4625, "step": 5728 }, { "epoch": 2.412887828162291, "grad_norm": 0.37258997559547424, "learning_rate": 1.1210400787799959e-06, "loss": 0.338, "step": 5729 }, { "epoch": 2.413308999017268, "grad_norm": 0.40758082270622253, "learning_rate": 1.1194940402527566e-06, "loss": 0.4612, "step": 5730 }, { "epoch": 2.413730169872245, "grad_norm": 0.3873364329338074, "learning_rate": 1.1179489341368617e-06, "loss": 0.3625, "step": 5731 }, { "epoch": 2.4141513407272215, "grad_norm": 0.46207883954048157, "learning_rate": 1.1164047608035678e-06, "loss": 0.4525, "step": 5732 }, { "epoch": 2.4145725115821985, "grad_norm": 0.406162291765213, "learning_rate": 1.1148615206239137e-06, "loss": 0.3859, "step": 5733 }, { "epoch": 2.414993682437175, "grad_norm": 0.42396724224090576, "learning_rate": 1.1133192139687087e-06, "loss": 0.4131, "step": 5734 }, { "epoch": 2.415414853292152, "grad_norm": 0.40537071228027344, "learning_rate": 1.1117778412085383e-06, "loss": 0.4137, "step": 5735 }, { "epoch": 2.415836024147129, "grad_norm": 0.3962092995643616, "learning_rate": 1.1102374027137675e-06, "loss": 0.3663, "step": 5736 }, { "epoch": 2.416257195002106, "grad_norm": 0.4075097441673279, "learning_rate": 1.108697898854531e-06, "loss": 0.3831, "step": 5737 }, { "epoch": 2.4166783658570825, "grad_norm": 0.446980357170105, "learning_rate": 1.107159330000746e-06, "loss": 0.4487, "step": 5738 }, { "epoch": 2.4170995367120596, "grad_norm": 0.3852386176586151, "learning_rate": 1.105621696522099e-06, "loss": 0.3742, "step": 5739 }, { "epoch": 2.417520707567036, "grad_norm": 0.3891226053237915, "learning_rate": 1.1040849987880525e-06, "loss": 0.3671, "step": 5740 }, { "epoch": 2.4179418784220132, "grad_norm": 0.404276579618454, "learning_rate": 1.1025492371678487e-06, "loss": 0.4272, "step": 5741 }, { "epoch": 2.41836304927699, "grad_norm": 0.41622665524482727, "learning_rate": 1.1010144120304984e-06, "loss": 0.3753, "step": 5742 }, { "epoch": 2.418784220131967, "grad_norm": 0.3589346706867218, "learning_rate": 1.0994805237447942e-06, "loss": 0.3825, "step": 5743 }, { "epoch": 2.4192053909869435, "grad_norm": 0.38167548179626465, "learning_rate": 1.0979475726792983e-06, "loss": 0.4187, "step": 5744 }, { "epoch": 2.4196265618419206, "grad_norm": 0.4481537938117981, "learning_rate": 1.0964155592023483e-06, "loss": 0.4242, "step": 5745 }, { "epoch": 2.420047732696897, "grad_norm": 0.46058908104896545, "learning_rate": 1.0948844836820587e-06, "loss": 0.4494, "step": 5746 }, { "epoch": 2.4204689035518743, "grad_norm": 0.38712936639785767, "learning_rate": 1.09335434648632e-06, "loss": 0.368, "step": 5747 }, { "epoch": 2.420890074406851, "grad_norm": 0.40365591645240784, "learning_rate": 1.0918251479827923e-06, "loss": 0.3979, "step": 5748 }, { "epoch": 2.421311245261828, "grad_norm": 0.48081693053245544, "learning_rate": 1.0902968885389136e-06, "loss": 0.4306, "step": 5749 }, { "epoch": 2.4217324161168046, "grad_norm": 0.514393150806427, "learning_rate": 1.088769568521894e-06, "loss": 0.4501, "step": 5750 }, { "epoch": 2.4221535869717816, "grad_norm": 0.4134466052055359, "learning_rate": 1.08724318829872e-06, "loss": 0.3851, "step": 5751 }, { "epoch": 2.4225747578267582, "grad_norm": 0.44741949439048767, "learning_rate": 1.0857177482361537e-06, "loss": 0.4018, "step": 5752 }, { "epoch": 2.4229959286817353, "grad_norm": 0.4174899160861969, "learning_rate": 1.0841932487007273e-06, "loss": 0.445, "step": 5753 }, { "epoch": 2.423417099536712, "grad_norm": 0.40113314986228943, "learning_rate": 1.0826696900587474e-06, "loss": 0.404, "step": 5754 }, { "epoch": 2.423838270391689, "grad_norm": 0.37526777386665344, "learning_rate": 1.081147072676299e-06, "loss": 0.3478, "step": 5755 }, { "epoch": 2.4242594412466656, "grad_norm": 0.4592922329902649, "learning_rate": 1.0796253969192355e-06, "loss": 0.3877, "step": 5756 }, { "epoch": 2.4246806121016427, "grad_norm": 0.4522237181663513, "learning_rate": 1.0781046631531888e-06, "loss": 0.41, "step": 5757 }, { "epoch": 2.4251017829566193, "grad_norm": 0.4323723614215851, "learning_rate": 1.0765848717435607e-06, "loss": 0.3998, "step": 5758 }, { "epoch": 2.4255229538115963, "grad_norm": 0.40166959166526794, "learning_rate": 1.075066023055527e-06, "loss": 0.384, "step": 5759 }, { "epoch": 2.425944124666573, "grad_norm": 0.40090617537498474, "learning_rate": 1.0735481174540403e-06, "loss": 0.3973, "step": 5760 }, { "epoch": 2.42636529552155, "grad_norm": 0.36570021510124207, "learning_rate": 1.0720311553038221e-06, "loss": 0.361, "step": 5761 }, { "epoch": 2.4267864663765266, "grad_norm": 0.49661538004875183, "learning_rate": 1.0705151369693712e-06, "loss": 0.4671, "step": 5762 }, { "epoch": 2.4272076372315037, "grad_norm": 0.4694691002368927, "learning_rate": 1.0690000628149571e-06, "loss": 0.4231, "step": 5763 }, { "epoch": 2.4276288080864803, "grad_norm": 0.4166810214519501, "learning_rate": 1.0674859332046216e-06, "loss": 0.4279, "step": 5764 }, { "epoch": 2.4280499789414574, "grad_norm": 0.4111526608467102, "learning_rate": 1.0659727485021843e-06, "loss": 0.4093, "step": 5765 }, { "epoch": 2.428471149796434, "grad_norm": 0.3974241614341736, "learning_rate": 1.0644605090712312e-06, "loss": 0.4214, "step": 5766 }, { "epoch": 2.428892320651411, "grad_norm": 0.39432817697525024, "learning_rate": 1.0629492152751258e-06, "loss": 0.3614, "step": 5767 }, { "epoch": 2.4293134915063876, "grad_norm": 0.4457697868347168, "learning_rate": 1.0614388674770066e-06, "loss": 0.4123, "step": 5768 }, { "epoch": 2.4297346623613647, "grad_norm": 0.3947513699531555, "learning_rate": 1.0599294660397747e-06, "loss": 0.467, "step": 5769 }, { "epoch": 2.4301558332163413, "grad_norm": 0.454258531332016, "learning_rate": 1.058421011326114e-06, "loss": 0.4546, "step": 5770 }, { "epoch": 2.4305770040713184, "grad_norm": 0.43712642788887024, "learning_rate": 1.0569135036984784e-06, "loss": 0.3941, "step": 5771 }, { "epoch": 2.430998174926295, "grad_norm": 0.4454747140407562, "learning_rate": 1.05540694351909e-06, "loss": 0.4064, "step": 5772 }, { "epoch": 2.431419345781272, "grad_norm": 0.36660030484199524, "learning_rate": 1.0539013311499503e-06, "loss": 0.3895, "step": 5773 }, { "epoch": 2.4318405166362487, "grad_norm": 0.38725271821022034, "learning_rate": 1.0523966669528264e-06, "loss": 0.3801, "step": 5774 }, { "epoch": 2.4322616874912257, "grad_norm": 0.39827510714530945, "learning_rate": 1.0508929512892591e-06, "loss": 0.3566, "step": 5775 }, { "epoch": 2.4326828583462023, "grad_norm": 0.45089414715766907, "learning_rate": 1.0493901845205662e-06, "loss": 0.4217, "step": 5776 }, { "epoch": 2.4331040292011794, "grad_norm": 0.4229193329811096, "learning_rate": 1.0478883670078304e-06, "loss": 0.4359, "step": 5777 }, { "epoch": 2.433525200056156, "grad_norm": 0.416728138923645, "learning_rate": 1.0463874991119133e-06, "loss": 0.4132, "step": 5778 }, { "epoch": 2.433946370911133, "grad_norm": 0.4322958290576935, "learning_rate": 1.0448875811934417e-06, "loss": 0.3784, "step": 5779 }, { "epoch": 2.4343675417661097, "grad_norm": 0.43524429202079773, "learning_rate": 1.0433886136128175e-06, "loss": 0.3651, "step": 5780 }, { "epoch": 2.4347887126210868, "grad_norm": 0.4510461688041687, "learning_rate": 1.0418905967302156e-06, "loss": 0.4626, "step": 5781 }, { "epoch": 2.4352098834760634, "grad_norm": 0.46063700318336487, "learning_rate": 1.0403935309055784e-06, "loss": 0.4358, "step": 5782 }, { "epoch": 2.4356310543310404, "grad_norm": 0.39364010095596313, "learning_rate": 1.0388974164986249e-06, "loss": 0.357, "step": 5783 }, { "epoch": 2.436052225186017, "grad_norm": 0.40722107887268066, "learning_rate": 1.0374022538688415e-06, "loss": 0.4885, "step": 5784 }, { "epoch": 2.436473396040994, "grad_norm": 0.3909861445426941, "learning_rate": 1.0359080433754859e-06, "loss": 0.3701, "step": 5785 }, { "epoch": 2.4368945668959707, "grad_norm": 0.4047218859195709, "learning_rate": 1.0344147853775892e-06, "loss": 0.3942, "step": 5786 }, { "epoch": 2.437315737750948, "grad_norm": 0.421289324760437, "learning_rate": 1.0329224802339549e-06, "loss": 0.3968, "step": 5787 }, { "epoch": 2.4377369086059244, "grad_norm": 0.3777852952480316, "learning_rate": 1.0314311283031531e-06, "loss": 0.354, "step": 5788 }, { "epoch": 2.4381580794609015, "grad_norm": 0.4077357351779938, "learning_rate": 1.029940729943526e-06, "loss": 0.4204, "step": 5789 }, { "epoch": 2.438579250315878, "grad_norm": 0.41761791706085205, "learning_rate": 1.0284512855131912e-06, "loss": 0.4113, "step": 5790 }, { "epoch": 2.439000421170855, "grad_norm": 0.4057803452014923, "learning_rate": 1.02696279537003e-06, "loss": 0.4211, "step": 5791 }, { "epoch": 2.4394215920258318, "grad_norm": 0.37190958857536316, "learning_rate": 1.0254752598717016e-06, "loss": 0.3667, "step": 5792 }, { "epoch": 2.439842762880809, "grad_norm": 0.43132656812667847, "learning_rate": 1.023988679375631e-06, "loss": 0.4472, "step": 5793 }, { "epoch": 2.4402639337357854, "grad_norm": 0.46058887243270874, "learning_rate": 1.022503054239013e-06, "loss": 0.3937, "step": 5794 }, { "epoch": 2.4406851045907625, "grad_norm": 0.4443357288837433, "learning_rate": 1.0210183848188183e-06, "loss": 0.4528, "step": 5795 }, { "epoch": 2.441106275445739, "grad_norm": 0.40210458636283875, "learning_rate": 1.0195346714717813e-06, "loss": 0.3583, "step": 5796 }, { "epoch": 2.441527446300716, "grad_norm": 0.4387394189834595, "learning_rate": 1.0180519145544126e-06, "loss": 0.4597, "step": 5797 }, { "epoch": 2.441948617155693, "grad_norm": 0.4268527030944824, "learning_rate": 1.0165701144229896e-06, "loss": 0.3611, "step": 5798 }, { "epoch": 2.44236978801067, "grad_norm": 0.48414817452430725, "learning_rate": 1.0150892714335587e-06, "loss": 0.4785, "step": 5799 }, { "epoch": 2.4427909588656465, "grad_norm": 0.43119099736213684, "learning_rate": 1.013609385941941e-06, "loss": 0.4115, "step": 5800 }, { "epoch": 2.4432121297206235, "grad_norm": 0.3670777678489685, "learning_rate": 1.0121304583037223e-06, "loss": 0.3674, "step": 5801 }, { "epoch": 2.4436333005756, "grad_norm": 0.40753671526908875, "learning_rate": 1.0106524888742614e-06, "loss": 0.3528, "step": 5802 }, { "epoch": 2.444054471430577, "grad_norm": 0.4509790241718292, "learning_rate": 1.0091754780086889e-06, "loss": 0.4415, "step": 5803 }, { "epoch": 2.444475642285554, "grad_norm": 0.4349087178707123, "learning_rate": 1.0076994260618967e-06, "loss": 0.4654, "step": 5804 }, { "epoch": 2.444896813140531, "grad_norm": 0.41384363174438477, "learning_rate": 1.0062243333885553e-06, "loss": 0.3705, "step": 5805 }, { "epoch": 2.4453179839955075, "grad_norm": 0.44397854804992676, "learning_rate": 1.0047502003431014e-06, "loss": 0.4093, "step": 5806 }, { "epoch": 2.445739154850484, "grad_norm": 0.40123406052589417, "learning_rate": 1.0032770272797394e-06, "loss": 0.4268, "step": 5807 }, { "epoch": 2.446160325705461, "grad_norm": 0.3781977593898773, "learning_rate": 1.0018048145524478e-06, "loss": 0.3893, "step": 5808 }, { "epoch": 2.446581496560438, "grad_norm": 0.41120535135269165, "learning_rate": 1.0003335625149668e-06, "loss": 0.3775, "step": 5809 }, { "epoch": 2.447002667415415, "grad_norm": 0.41344255208969116, "learning_rate": 9.98863271520812e-07, "loss": 0.4449, "step": 5810 }, { "epoch": 2.4474238382703914, "grad_norm": 0.39180949330329895, "learning_rate": 9.97393941923267e-07, "loss": 0.3744, "step": 5811 }, { "epoch": 2.4478450091253685, "grad_norm": 0.46842050552368164, "learning_rate": 9.959255740753837e-07, "loss": 0.4272, "step": 5812 }, { "epoch": 2.4482661799803456, "grad_norm": 0.41536745429039, "learning_rate": 9.944581683299804e-07, "loss": 0.3663, "step": 5813 }, { "epoch": 2.448687350835322, "grad_norm": 0.4023706614971161, "learning_rate": 9.929917250396498e-07, "loss": 0.4014, "step": 5814 }, { "epoch": 2.449108521690299, "grad_norm": 0.4076787531375885, "learning_rate": 9.915262445567474e-07, "loss": 0.3708, "step": 5815 }, { "epoch": 2.449529692545276, "grad_norm": 0.38666051626205444, "learning_rate": 9.900617272334023e-07, "loss": 0.3973, "step": 5816 }, { "epoch": 2.449950863400253, "grad_norm": 0.43542081117630005, "learning_rate": 9.885981734215094e-07, "loss": 0.4028, "step": 5817 }, { "epoch": 2.4503720342552295, "grad_norm": 0.3865068852901459, "learning_rate": 9.871355834727315e-07, "loss": 0.4306, "step": 5818 }, { "epoch": 2.450793205110206, "grad_norm": 0.47667196393013, "learning_rate": 9.856739577385022e-07, "loss": 0.399, "step": 5819 }, { "epoch": 2.451214375965183, "grad_norm": 0.4939284324645996, "learning_rate": 9.842132965700214e-07, "loss": 0.4901, "step": 5820 }, { "epoch": 2.4516355468201603, "grad_norm": 0.4311612546443939, "learning_rate": 9.827536003182593e-07, "loss": 0.3697, "step": 5821 }, { "epoch": 2.452056717675137, "grad_norm": 0.4447091221809387, "learning_rate": 9.81294869333952e-07, "loss": 0.408, "step": 5822 }, { "epoch": 2.4524778885301135, "grad_norm": 0.3874862790107727, "learning_rate": 9.798371039676036e-07, "loss": 0.4068, "step": 5823 }, { "epoch": 2.4528990593850906, "grad_norm": 0.4320547580718994, "learning_rate": 9.7838030456949e-07, "loss": 0.4004, "step": 5824 }, { "epoch": 2.4533202302400676, "grad_norm": 0.37092915177345276, "learning_rate": 9.769244714896493e-07, "loss": 0.4033, "step": 5825 }, { "epoch": 2.4537414010950442, "grad_norm": 0.3897397220134735, "learning_rate": 9.754696050778916e-07, "loss": 0.3935, "step": 5826 }, { "epoch": 2.454162571950021, "grad_norm": 0.41881752014160156, "learning_rate": 9.740157056837962e-07, "loss": 0.3958, "step": 5827 }, { "epoch": 2.454583742804998, "grad_norm": 0.44851699471473694, "learning_rate": 9.725627736567023e-07, "loss": 0.3887, "step": 5828 }, { "epoch": 2.455004913659975, "grad_norm": 0.41710084676742554, "learning_rate": 9.711108093457233e-07, "loss": 0.4251, "step": 5829 }, { "epoch": 2.4554260845149516, "grad_norm": 0.39151275157928467, "learning_rate": 9.696598130997415e-07, "loss": 0.4211, "step": 5830 }, { "epoch": 2.455847255369928, "grad_norm": 0.39026832580566406, "learning_rate": 9.682097852673993e-07, "loss": 0.3889, "step": 5831 }, { "epoch": 2.4562684262249053, "grad_norm": 0.436501681804657, "learning_rate": 9.66760726197114e-07, "loss": 0.4359, "step": 5832 }, { "epoch": 2.456689597079882, "grad_norm": 0.3644033670425415, "learning_rate": 9.653126362370652e-07, "loss": 0.3509, "step": 5833 }, { "epoch": 2.457110767934859, "grad_norm": 0.4503648281097412, "learning_rate": 9.638655157352006e-07, "loss": 0.4734, "step": 5834 }, { "epoch": 2.4575319387898356, "grad_norm": 0.3945215344429016, "learning_rate": 9.624193650392373e-07, "loss": 0.3683, "step": 5835 }, { "epoch": 2.4579531096448126, "grad_norm": 0.45565539598464966, "learning_rate": 9.609741844966552e-07, "loss": 0.4657, "step": 5836 }, { "epoch": 2.4583742804997892, "grad_norm": 0.39316192269325256, "learning_rate": 9.595299744547065e-07, "loss": 0.3715, "step": 5837 }, { "epoch": 2.4587954513547663, "grad_norm": 0.4284883737564087, "learning_rate": 9.580867352604057e-07, "loss": 0.3901, "step": 5838 }, { "epoch": 2.459216622209743, "grad_norm": 0.4012488126754761, "learning_rate": 9.566444672605341e-07, "loss": 0.3802, "step": 5839 }, { "epoch": 2.45963779306472, "grad_norm": 0.4513733386993408, "learning_rate": 9.55203170801644e-07, "loss": 0.4351, "step": 5840 }, { "epoch": 2.4600589639196966, "grad_norm": 0.4286956489086151, "learning_rate": 9.53762846230049e-07, "loss": 0.4268, "step": 5841 }, { "epoch": 2.4604801347746736, "grad_norm": 0.40224456787109375, "learning_rate": 9.523234938918335e-07, "loss": 0.3808, "step": 5842 }, { "epoch": 2.4609013056296503, "grad_norm": 0.3937649428844452, "learning_rate": 9.508851141328457e-07, "loss": 0.4037, "step": 5843 }, { "epoch": 2.4613224764846273, "grad_norm": 0.4166744649410248, "learning_rate": 9.494477072986991e-07, "loss": 0.4229, "step": 5844 }, { "epoch": 2.461743647339604, "grad_norm": 0.47504255175590515, "learning_rate": 9.480112737347763e-07, "loss": 0.4547, "step": 5845 }, { "epoch": 2.462164818194581, "grad_norm": 0.4659315347671509, "learning_rate": 9.465758137862264e-07, "loss": 0.4629, "step": 5846 }, { "epoch": 2.4625859890495576, "grad_norm": 0.42794671654701233, "learning_rate": 9.45141327797961e-07, "loss": 0.3859, "step": 5847 }, { "epoch": 2.4630071599045347, "grad_norm": 0.41983264684677124, "learning_rate": 9.43707816114659e-07, "loss": 0.4039, "step": 5848 }, { "epoch": 2.4634283307595113, "grad_norm": 0.42511260509490967, "learning_rate": 9.422752790807683e-07, "loss": 0.3643, "step": 5849 }, { "epoch": 2.4638495016144883, "grad_norm": 0.4610311985015869, "learning_rate": 9.408437170404971e-07, "loss": 0.5031, "step": 5850 }, { "epoch": 2.464270672469465, "grad_norm": 0.3996008038520813, "learning_rate": 9.394131303378257e-07, "loss": 0.3501, "step": 5851 }, { "epoch": 2.464691843324442, "grad_norm": 0.4150763154029846, "learning_rate": 9.379835193164949e-07, "loss": 0.3955, "step": 5852 }, { "epoch": 2.4651130141794186, "grad_norm": 0.4712223708629608, "learning_rate": 9.365548843200118e-07, "loss": 0.4885, "step": 5853 }, { "epoch": 2.4655341850343957, "grad_norm": 0.40333977341651917, "learning_rate": 9.351272256916522e-07, "loss": 0.3362, "step": 5854 }, { "epoch": 2.4659553558893723, "grad_norm": 0.4015653133392334, "learning_rate": 9.337005437744534e-07, "loss": 0.4139, "step": 5855 }, { "epoch": 2.4663765267443494, "grad_norm": 0.3965374529361725, "learning_rate": 9.322748389112213e-07, "loss": 0.3945, "step": 5856 }, { "epoch": 2.466797697599326, "grad_norm": 0.41286811232566833, "learning_rate": 9.308501114445257e-07, "loss": 0.4314, "step": 5857 }, { "epoch": 2.467218868454303, "grad_norm": 0.45561864972114563, "learning_rate": 9.294263617166981e-07, "loss": 0.4181, "step": 5858 }, { "epoch": 2.4676400393092797, "grad_norm": 0.34712639451026917, "learning_rate": 9.280035900698431e-07, "loss": 0.2907, "step": 5859 }, { "epoch": 2.4680612101642567, "grad_norm": 0.5119150876998901, "learning_rate": 9.265817968458207e-07, "loss": 0.4726, "step": 5860 }, { "epoch": 2.4684823810192333, "grad_norm": 0.34934303164482117, "learning_rate": 9.251609823862639e-07, "loss": 0.3824, "step": 5861 }, { "epoch": 2.4689035518742104, "grad_norm": 0.39783984422683716, "learning_rate": 9.237411470325685e-07, "loss": 0.3979, "step": 5862 }, { "epoch": 2.469324722729187, "grad_norm": 0.4171576201915741, "learning_rate": 9.223222911258889e-07, "loss": 0.4024, "step": 5863 }, { "epoch": 2.469745893584164, "grad_norm": 0.37299421429634094, "learning_rate": 9.209044150071522e-07, "loss": 0.3595, "step": 5864 }, { "epoch": 2.4701670644391407, "grad_norm": 0.4069836437702179, "learning_rate": 9.194875190170472e-07, "loss": 0.4026, "step": 5865 }, { "epoch": 2.4705882352941178, "grad_norm": 0.4228487014770508, "learning_rate": 9.180716034960252e-07, "loss": 0.3905, "step": 5866 }, { "epoch": 2.4710094061490944, "grad_norm": 0.3893607556819916, "learning_rate": 9.166566687843071e-07, "loss": 0.3745, "step": 5867 }, { "epoch": 2.4714305770040714, "grad_norm": 0.4175100326538086, "learning_rate": 9.152427152218696e-07, "loss": 0.4179, "step": 5868 }, { "epoch": 2.471851747859048, "grad_norm": 0.4486755132675171, "learning_rate": 9.138297431484611e-07, "loss": 0.342, "step": 5869 }, { "epoch": 2.472272918714025, "grad_norm": 0.5154278874397278, "learning_rate": 9.124177529035927e-07, "loss": 0.4692, "step": 5870 }, { "epoch": 2.4726940895690017, "grad_norm": 0.4548012912273407, "learning_rate": 9.110067448265369e-07, "loss": 0.432, "step": 5871 }, { "epoch": 2.473115260423979, "grad_norm": 0.4419166147708893, "learning_rate": 9.095967192563343e-07, "loss": 0.3989, "step": 5872 }, { "epoch": 2.4735364312789554, "grad_norm": 0.4258696436882019, "learning_rate": 9.081876765317849e-07, "loss": 0.4145, "step": 5873 }, { "epoch": 2.4739576021339325, "grad_norm": 0.43006783723831177, "learning_rate": 9.06779616991455e-07, "loss": 0.4105, "step": 5874 }, { "epoch": 2.474378772988909, "grad_norm": 0.41610655188560486, "learning_rate": 9.053725409736752e-07, "loss": 0.3695, "step": 5875 }, { "epoch": 2.474799943843886, "grad_norm": 0.5880569815635681, "learning_rate": 9.039664488165378e-07, "loss": 0.4518, "step": 5876 }, { "epoch": 2.4752211146988627, "grad_norm": 0.37983202934265137, "learning_rate": 9.025613408579021e-07, "loss": 0.3567, "step": 5877 }, { "epoch": 2.47564228555384, "grad_norm": 0.4179748594760895, "learning_rate": 9.011572174353867e-07, "loss": 0.4295, "step": 5878 }, { "epoch": 2.4760634564088164, "grad_norm": 0.4212133586406708, "learning_rate": 8.997540788863746e-07, "loss": 0.412, "step": 5879 }, { "epoch": 2.4764846272637935, "grad_norm": 0.4223991930484772, "learning_rate": 8.983519255480161e-07, "loss": 0.4316, "step": 5880 }, { "epoch": 2.47690579811877, "grad_norm": 0.4007820785045624, "learning_rate": 8.969507577572189e-07, "loss": 0.3848, "step": 5881 }, { "epoch": 2.477326968973747, "grad_norm": 0.38966116309165955, "learning_rate": 8.9555057585066e-07, "loss": 0.4011, "step": 5882 }, { "epoch": 2.4777481398287238, "grad_norm": 0.4066704213619232, "learning_rate": 8.941513801647739e-07, "loss": 0.4061, "step": 5883 }, { "epoch": 2.478169310683701, "grad_norm": 0.41323748230934143, "learning_rate": 8.9275317103576e-07, "loss": 0.3839, "step": 5884 }, { "epoch": 2.4785904815386774, "grad_norm": 0.4403276741504669, "learning_rate": 8.913559487995826e-07, "loss": 0.4001, "step": 5885 }, { "epoch": 2.4790116523936545, "grad_norm": 0.43850982189178467, "learning_rate": 8.899597137919685e-07, "loss": 0.4268, "step": 5886 }, { "epoch": 2.479432823248631, "grad_norm": 0.44629210233688354, "learning_rate": 8.88564466348405e-07, "loss": 0.3883, "step": 5887 }, { "epoch": 2.479853994103608, "grad_norm": 0.4489041864871979, "learning_rate": 8.871702068041421e-07, "loss": 0.4316, "step": 5888 }, { "epoch": 2.480275164958585, "grad_norm": 0.3703983426094055, "learning_rate": 8.857769354941964e-07, "loss": 0.3592, "step": 5889 }, { "epoch": 2.480696335813562, "grad_norm": 0.414571613073349, "learning_rate": 8.843846527533418e-07, "loss": 0.419, "step": 5890 }, { "epoch": 2.4811175066685385, "grad_norm": 0.42231571674346924, "learning_rate": 8.829933589161194e-07, "loss": 0.4085, "step": 5891 }, { "epoch": 2.4815386775235155, "grad_norm": 0.42484092712402344, "learning_rate": 8.816030543168291e-07, "loss": 0.4105, "step": 5892 }, { "epoch": 2.481959848378492, "grad_norm": 0.3981795608997345, "learning_rate": 8.802137392895338e-07, "loss": 0.3404, "step": 5893 }, { "epoch": 2.482381019233469, "grad_norm": 0.4167180061340332, "learning_rate": 8.788254141680618e-07, "loss": 0.4045, "step": 5894 }, { "epoch": 2.482802190088446, "grad_norm": 0.42626702785491943, "learning_rate": 8.774380792859983e-07, "loss": 0.457, "step": 5895 }, { "epoch": 2.483223360943423, "grad_norm": 0.41308653354644775, "learning_rate": 8.760517349766956e-07, "loss": 0.4034, "step": 5896 }, { "epoch": 2.4836445317983995, "grad_norm": 0.454199880361557, "learning_rate": 8.746663815732653e-07, "loss": 0.5105, "step": 5897 }, { "epoch": 2.4840657026533766, "grad_norm": 0.38687899708747864, "learning_rate": 8.732820194085794e-07, "loss": 0.3531, "step": 5898 }, { "epoch": 2.484486873508353, "grad_norm": 0.4050614833831787, "learning_rate": 8.718986488152758e-07, "loss": 0.3876, "step": 5899 }, { "epoch": 2.4849080443633302, "grad_norm": 0.4912281632423401, "learning_rate": 8.705162701257502e-07, "loss": 0.4091, "step": 5900 }, { "epoch": 2.485329215218307, "grad_norm": 0.43022096157073975, "learning_rate": 8.691348836721631e-07, "loss": 0.3412, "step": 5901 }, { "epoch": 2.485750386073284, "grad_norm": 0.42125654220581055, "learning_rate": 8.67754489786437e-07, "loss": 0.4333, "step": 5902 }, { "epoch": 2.4861715569282605, "grad_norm": 0.42735761404037476, "learning_rate": 8.663750888002498e-07, "loss": 0.4066, "step": 5903 }, { "epoch": 2.4865927277832376, "grad_norm": 0.37632930278778076, "learning_rate": 8.649966810450472e-07, "loss": 0.3537, "step": 5904 }, { "epoch": 2.487013898638214, "grad_norm": 0.4072282910346985, "learning_rate": 8.63619266852036e-07, "loss": 0.4397, "step": 5905 }, { "epoch": 2.4874350694931913, "grad_norm": 0.37605834007263184, "learning_rate": 8.622428465521793e-07, "loss": 0.352, "step": 5906 }, { "epoch": 2.487856240348168, "grad_norm": 0.45602133870124817, "learning_rate": 8.608674204762079e-07, "loss": 0.4161, "step": 5907 }, { "epoch": 2.4882774112031445, "grad_norm": 0.4039812386035919, "learning_rate": 8.594929889546083e-07, "loss": 0.3967, "step": 5908 }, { "epoch": 2.4886985820581216, "grad_norm": 0.4063108563423157, "learning_rate": 8.581195523176289e-07, "loss": 0.3866, "step": 5909 }, { "epoch": 2.4891197529130986, "grad_norm": 0.40170320868492126, "learning_rate": 8.567471108952835e-07, "loss": 0.3836, "step": 5910 }, { "epoch": 2.4895409237680752, "grad_norm": 0.44743582606315613, "learning_rate": 8.553756650173406e-07, "loss": 0.4789, "step": 5911 }, { "epoch": 2.489962094623052, "grad_norm": 0.39598870277404785, "learning_rate": 8.540052150133344e-07, "loss": 0.407, "step": 5912 }, { "epoch": 2.490383265478029, "grad_norm": 0.41455376148223877, "learning_rate": 8.526357612125574e-07, "loss": 0.3486, "step": 5913 }, { "epoch": 2.490804436333006, "grad_norm": 0.4482273459434509, "learning_rate": 8.512673039440616e-07, "loss": 0.4613, "step": 5914 }, { "epoch": 2.4912256071879826, "grad_norm": 0.3859825134277344, "learning_rate": 8.498998435366634e-07, "loss": 0.3885, "step": 5915 }, { "epoch": 2.491646778042959, "grad_norm": 0.42777132987976074, "learning_rate": 8.48533380318936e-07, "loss": 0.3937, "step": 5916 }, { "epoch": 2.4920679488979363, "grad_norm": 0.3877014219760895, "learning_rate": 8.471679146192136e-07, "loss": 0.387, "step": 5917 }, { "epoch": 2.4924891197529133, "grad_norm": 0.41525086760520935, "learning_rate": 8.45803446765594e-07, "loss": 0.4278, "step": 5918 }, { "epoch": 2.49291029060789, "grad_norm": 0.4186389446258545, "learning_rate": 8.444399770859296e-07, "loss": 0.4114, "step": 5919 }, { "epoch": 2.4933314614628665, "grad_norm": 0.4079332947731018, "learning_rate": 8.430775059078378e-07, "loss": 0.394, "step": 5920 }, { "epoch": 2.4937526323178436, "grad_norm": 0.4069908559322357, "learning_rate": 8.417160335586972e-07, "loss": 0.3836, "step": 5921 }, { "epoch": 2.4941738031728207, "grad_norm": 0.4176546037197113, "learning_rate": 8.403555603656377e-07, "loss": 0.4042, "step": 5922 }, { "epoch": 2.4945949740277973, "grad_norm": 0.4441285729408264, "learning_rate": 8.38996086655558e-07, "loss": 0.4042, "step": 5923 }, { "epoch": 2.495016144882774, "grad_norm": 0.4733518660068512, "learning_rate": 8.376376127551144e-07, "loss": 0.4286, "step": 5924 }, { "epoch": 2.495437315737751, "grad_norm": 0.41439488530158997, "learning_rate": 8.362801389907211e-07, "loss": 0.441, "step": 5925 }, { "epoch": 2.495858486592728, "grad_norm": 0.37924569845199585, "learning_rate": 8.349236656885546e-07, "loss": 0.3788, "step": 5926 }, { "epoch": 2.4962796574477046, "grad_norm": 0.4235139787197113, "learning_rate": 8.335681931745465e-07, "loss": 0.505, "step": 5927 }, { "epoch": 2.4967008283026813, "grad_norm": 0.40710628032684326, "learning_rate": 8.322137217743925e-07, "loss": 0.4189, "step": 5928 }, { "epoch": 2.4971219991576583, "grad_norm": 0.41613534092903137, "learning_rate": 8.308602518135473e-07, "loss": 0.3693, "step": 5929 }, { "epoch": 2.4975431700126354, "grad_norm": 0.4072856605052948, "learning_rate": 8.295077836172216e-07, "loss": 0.4143, "step": 5930 }, { "epoch": 2.497964340867612, "grad_norm": 0.4517822265625, "learning_rate": 8.281563175103896e-07, "loss": 0.4084, "step": 5931 }, { "epoch": 2.4983855117225886, "grad_norm": 0.4396267235279083, "learning_rate": 8.268058538177826e-07, "loss": 0.3779, "step": 5932 }, { "epoch": 2.4988066825775657, "grad_norm": 0.40055978298187256, "learning_rate": 8.254563928638892e-07, "loss": 0.4097, "step": 5933 }, { "epoch": 2.4992278534325423, "grad_norm": 0.46269887685775757, "learning_rate": 8.241079349729619e-07, "loss": 0.4897, "step": 5934 }, { "epoch": 2.4996490242875193, "grad_norm": 0.3777577877044678, "learning_rate": 8.227604804690065e-07, "loss": 0.3536, "step": 5935 }, { "epoch": 2.500070195142496, "grad_norm": 0.3850068151950836, "learning_rate": 8.214140296757928e-07, "loss": 0.4081, "step": 5936 }, { "epoch": 2.500491365997473, "grad_norm": 0.44356632232666016, "learning_rate": 8.200685829168464e-07, "loss": 0.4011, "step": 5937 }, { "epoch": 2.50091253685245, "grad_norm": 0.4192924499511719, "learning_rate": 8.187241405154511e-07, "loss": 0.4184, "step": 5938 }, { "epoch": 2.5013337077074267, "grad_norm": 0.42507612705230713, "learning_rate": 8.173807027946528e-07, "loss": 0.4247, "step": 5939 }, { "epoch": 2.5017548785624033, "grad_norm": 0.38902172446250916, "learning_rate": 8.160382700772524e-07, "loss": 0.3757, "step": 5940 }, { "epoch": 2.5021760494173804, "grad_norm": 0.37827926874160767, "learning_rate": 8.146968426858121e-07, "loss": 0.3713, "step": 5941 }, { "epoch": 2.5025972202723574, "grad_norm": 0.4323666989803314, "learning_rate": 8.133564209426504e-07, "loss": 0.4617, "step": 5942 }, { "epoch": 2.503018391127334, "grad_norm": 0.3813711106777191, "learning_rate": 8.12017005169844e-07, "loss": 0.3469, "step": 5943 }, { "epoch": 2.5034395619823107, "grad_norm": 0.3959009349346161, "learning_rate": 8.1067859568923e-07, "loss": 0.3765, "step": 5944 }, { "epoch": 2.5038607328372877, "grad_norm": 0.45232343673706055, "learning_rate": 8.093411928224037e-07, "loss": 0.4319, "step": 5945 }, { "epoch": 2.5042819036922643, "grad_norm": 0.41408422589302063, "learning_rate": 8.080047968907167e-07, "loss": 0.3827, "step": 5946 }, { "epoch": 2.5047030745472414, "grad_norm": 0.39368507266044617, "learning_rate": 8.066694082152776e-07, "loss": 0.4474, "step": 5947 }, { "epoch": 2.505124245402218, "grad_norm": 0.43475648760795593, "learning_rate": 8.053350271169569e-07, "loss": 0.421, "step": 5948 }, { "epoch": 2.505545416257195, "grad_norm": 0.4311501085758209, "learning_rate": 8.040016539163792e-07, "loss": 0.3723, "step": 5949 }, { "epoch": 2.5059665871121717, "grad_norm": 0.4333311915397644, "learning_rate": 8.026692889339305e-07, "loss": 0.4279, "step": 5950 }, { "epoch": 2.5063877579671487, "grad_norm": 0.4573356509208679, "learning_rate": 8.013379324897513e-07, "loss": 0.4653, "step": 5951 }, { "epoch": 2.5068089288221254, "grad_norm": 0.4701921045780182, "learning_rate": 8.000075849037409e-07, "loss": 0.4578, "step": 5952 }, { "epoch": 2.5072300996771024, "grad_norm": 0.4232734143733978, "learning_rate": 7.986782464955572e-07, "loss": 0.4201, "step": 5953 }, { "epoch": 2.507651270532079, "grad_norm": 0.45695480704307556, "learning_rate": 7.973499175846139e-07, "loss": 0.4354, "step": 5954 }, { "epoch": 2.508072441387056, "grad_norm": 0.4834212064743042, "learning_rate": 7.960225984900844e-07, "loss": 0.4789, "step": 5955 }, { "epoch": 2.5084936122420327, "grad_norm": 0.45171794295310974, "learning_rate": 7.946962895308968e-07, "loss": 0.3997, "step": 5956 }, { "epoch": 2.5089147830970098, "grad_norm": 0.4209800362586975, "learning_rate": 7.933709910257375e-07, "loss": 0.4236, "step": 5957 }, { "epoch": 2.5093359539519864, "grad_norm": 0.4600510597229004, "learning_rate": 7.920467032930523e-07, "loss": 0.4427, "step": 5958 }, { "epoch": 2.5097571248069634, "grad_norm": 0.3895034193992615, "learning_rate": 7.907234266510394e-07, "loss": 0.3243, "step": 5959 }, { "epoch": 2.51017829566194, "grad_norm": 0.4072572588920593, "learning_rate": 7.894011614176583e-07, "loss": 0.3905, "step": 5960 }, { "epoch": 2.510599466516917, "grad_norm": 0.4047645628452301, "learning_rate": 7.880799079106272e-07, "loss": 0.4419, "step": 5961 }, { "epoch": 2.5110206373718937, "grad_norm": 0.38010478019714355, "learning_rate": 7.86759666447412e-07, "loss": 0.3855, "step": 5962 }, { "epoch": 2.511441808226871, "grad_norm": 0.31957921385765076, "learning_rate": 7.854404373452446e-07, "loss": 0.3629, "step": 5963 }, { "epoch": 2.5118629790818474, "grad_norm": 0.44794753193855286, "learning_rate": 7.841222209211119e-07, "loss": 0.4529, "step": 5964 }, { "epoch": 2.5122841499368245, "grad_norm": 0.38499417901039124, "learning_rate": 7.828050174917528e-07, "loss": 0.3413, "step": 5965 }, { "epoch": 2.512705320791801, "grad_norm": 0.4367479979991913, "learning_rate": 7.814888273736698e-07, "loss": 0.442, "step": 5966 }, { "epoch": 2.513126491646778, "grad_norm": 0.45805084705352783, "learning_rate": 7.801736508831159e-07, "loss": 0.3945, "step": 5967 }, { "epoch": 2.5135476625017548, "grad_norm": 0.3607465326786041, "learning_rate": 7.788594883361017e-07, "loss": 0.3844, "step": 5968 }, { "epoch": 2.513968833356732, "grad_norm": 0.3885623514652252, "learning_rate": 7.775463400483984e-07, "loss": 0.4182, "step": 5969 }, { "epoch": 2.5143900042117084, "grad_norm": 0.3703627288341522, "learning_rate": 7.762342063355272e-07, "loss": 0.3351, "step": 5970 }, { "epoch": 2.5148111750666855, "grad_norm": 0.4419272840023041, "learning_rate": 7.749230875127722e-07, "loss": 0.4192, "step": 5971 }, { "epoch": 2.515232345921662, "grad_norm": 0.4377315640449524, "learning_rate": 7.736129838951678e-07, "loss": 0.3903, "step": 5972 }, { "epoch": 2.515653516776639, "grad_norm": 0.45982322096824646, "learning_rate": 7.723038957975066e-07, "loss": 0.4128, "step": 5973 }, { "epoch": 2.516074687631616, "grad_norm": 0.4205114543437958, "learning_rate": 7.70995823534339e-07, "loss": 0.3963, "step": 5974 }, { "epoch": 2.516495858486593, "grad_norm": 0.44960731267929077, "learning_rate": 7.696887674199676e-07, "loss": 0.4492, "step": 5975 }, { "epoch": 2.5169170293415695, "grad_norm": 0.4184452295303345, "learning_rate": 7.683827277684558e-07, "loss": 0.3586, "step": 5976 }, { "epoch": 2.5173382001965465, "grad_norm": 0.37651655077934265, "learning_rate": 7.67077704893619e-07, "loss": 0.3733, "step": 5977 }, { "epoch": 2.517759371051523, "grad_norm": 0.4218204915523529, "learning_rate": 7.657736991090264e-07, "loss": 0.4485, "step": 5978 }, { "epoch": 2.5181805419065, "grad_norm": 0.4165917932987213, "learning_rate": 7.644707107280086e-07, "loss": 0.3633, "step": 5979 }, { "epoch": 2.518601712761477, "grad_norm": 0.4332257807254791, "learning_rate": 7.631687400636495e-07, "loss": 0.4304, "step": 5980 }, { "epoch": 2.519022883616454, "grad_norm": 0.44637197256088257, "learning_rate": 7.618677874287861e-07, "loss": 0.4419, "step": 5981 }, { "epoch": 2.5194440544714305, "grad_norm": 0.37567394971847534, "learning_rate": 7.605678531360111e-07, "loss": 0.3534, "step": 5982 }, { "epoch": 2.5198652253264076, "grad_norm": 0.4378330409526825, "learning_rate": 7.592689374976769e-07, "loss": 0.407, "step": 5983 }, { "epoch": 2.520286396181384, "grad_norm": 0.4246538579463959, "learning_rate": 7.579710408258856e-07, "loss": 0.3922, "step": 5984 }, { "epoch": 2.5207075670363612, "grad_norm": 0.45862266421318054, "learning_rate": 7.566741634324987e-07, "loss": 0.402, "step": 5985 }, { "epoch": 2.521128737891338, "grad_norm": 0.4023420810699463, "learning_rate": 7.553783056291308e-07, "loss": 0.3749, "step": 5986 }, { "epoch": 2.521549908746315, "grad_norm": 0.40796276926994324, "learning_rate": 7.540834677271497e-07, "loss": 0.4127, "step": 5987 }, { "epoch": 2.5219710796012915, "grad_norm": 0.46371990442276, "learning_rate": 7.52789650037683e-07, "loss": 0.4352, "step": 5988 }, { "epoch": 2.5223922504562686, "grad_norm": 0.39357563853263855, "learning_rate": 7.514968528716076e-07, "loss": 0.3946, "step": 5989 }, { "epoch": 2.522813421311245, "grad_norm": 0.4119674265384674, "learning_rate": 7.502050765395608e-07, "loss": 0.4378, "step": 5990 }, { "epoch": 2.5232345921662223, "grad_norm": 0.39544519782066345, "learning_rate": 7.489143213519301e-07, "loss": 0.3766, "step": 5991 }, { "epoch": 2.523655763021199, "grad_norm": 0.3650076985359192, "learning_rate": 7.476245876188581e-07, "loss": 0.3465, "step": 5992 }, { "epoch": 2.524076933876176, "grad_norm": 0.4081384241580963, "learning_rate": 7.463358756502464e-07, "loss": 0.4337, "step": 5993 }, { "epoch": 2.5244981047311525, "grad_norm": 0.3501736521720886, "learning_rate": 7.45048185755744e-07, "loss": 0.3586, "step": 5994 }, { "epoch": 2.5249192755861296, "grad_norm": 0.4201074540615082, "learning_rate": 7.437615182447605e-07, "loss": 0.4119, "step": 5995 }, { "epoch": 2.5253404464411062, "grad_norm": 0.419554203748703, "learning_rate": 7.424758734264592e-07, "loss": 0.4592, "step": 5996 }, { "epoch": 2.525761617296083, "grad_norm": 0.4261375069618225, "learning_rate": 7.411912516097514e-07, "loss": 0.3753, "step": 5997 }, { "epoch": 2.52618278815106, "grad_norm": 0.44637948274612427, "learning_rate": 7.399076531033111e-07, "loss": 0.4548, "step": 5998 }, { "epoch": 2.526603959006037, "grad_norm": 0.4288072884082794, "learning_rate": 7.386250782155596e-07, "loss": 0.4042, "step": 5999 }, { "epoch": 2.5270251298610136, "grad_norm": 0.40613076090812683, "learning_rate": 7.373435272546764e-07, "loss": 0.3736, "step": 6000 }, { "epoch": 2.52744630071599, "grad_norm": 0.403268426656723, "learning_rate": 7.360630005285963e-07, "loss": 0.362, "step": 6001 }, { "epoch": 2.5278674715709673, "grad_norm": 0.412984699010849, "learning_rate": 7.347834983449997e-07, "loss": 0.4555, "step": 6002 }, { "epoch": 2.5282886424259443, "grad_norm": 0.39132317900657654, "learning_rate": 7.335050210113304e-07, "loss": 0.3949, "step": 6003 }, { "epoch": 2.528709813280921, "grad_norm": 0.3670423924922943, "learning_rate": 7.322275688347819e-07, "loss": 0.3568, "step": 6004 }, { "epoch": 2.5291309841358975, "grad_norm": 0.44283947348594666, "learning_rate": 7.309511421223003e-07, "loss": 0.414, "step": 6005 }, { "epoch": 2.5295521549908746, "grad_norm": 0.3952174484729767, "learning_rate": 7.296757411805877e-07, "loss": 0.3777, "step": 6006 }, { "epoch": 2.5299733258458517, "grad_norm": 0.3677303194999695, "learning_rate": 7.284013663160983e-07, "loss": 0.3556, "step": 6007 }, { "epoch": 2.5303944967008283, "grad_norm": 0.40763577818870544, "learning_rate": 7.271280178350382e-07, "loss": 0.3921, "step": 6008 }, { "epoch": 2.530815667555805, "grad_norm": 0.4490340054035187, "learning_rate": 7.258556960433711e-07, "loss": 0.451, "step": 6009 }, { "epoch": 2.531236838410782, "grad_norm": 0.4566035866737366, "learning_rate": 7.245844012468095e-07, "loss": 0.3819, "step": 6010 }, { "epoch": 2.531658009265759, "grad_norm": 0.3915024995803833, "learning_rate": 7.233141337508237e-07, "loss": 0.3631, "step": 6011 }, { "epoch": 2.5320791801207356, "grad_norm": 0.4659445881843567, "learning_rate": 7.220448938606328e-07, "loss": 0.3968, "step": 6012 }, { "epoch": 2.5325003509757122, "grad_norm": 0.44642913341522217, "learning_rate": 7.207766818812107e-07, "loss": 0.455, "step": 6013 }, { "epoch": 2.5329215218306893, "grad_norm": 0.42866411805152893, "learning_rate": 7.195094981172862e-07, "loss": 0.4392, "step": 6014 }, { "epoch": 2.5333426926856664, "grad_norm": 0.4424460232257843, "learning_rate": 7.182433428733371e-07, "loss": 0.3901, "step": 6015 }, { "epoch": 2.533763863540643, "grad_norm": 0.4483625888824463, "learning_rate": 7.169782164535983e-07, "loss": 0.3976, "step": 6016 }, { "epoch": 2.5341850343956196, "grad_norm": 0.40140846371650696, "learning_rate": 7.157141191620548e-07, "loss": 0.3843, "step": 6017 }, { "epoch": 2.5346062052505967, "grad_norm": 0.3918793797492981, "learning_rate": 7.144510513024433e-07, "loss": 0.4447, "step": 6018 }, { "epoch": 2.5350273761055737, "grad_norm": 0.4304005205631256, "learning_rate": 7.131890131782571e-07, "loss": 0.4103, "step": 6019 }, { "epoch": 2.5354485469605503, "grad_norm": 0.4097425937652588, "learning_rate": 7.119280050927407e-07, "loss": 0.4244, "step": 6020 }, { "epoch": 2.535869717815527, "grad_norm": 0.36895623803138733, "learning_rate": 7.106680273488864e-07, "loss": 0.3719, "step": 6021 }, { "epoch": 2.536290888670504, "grad_norm": 0.45905831456184387, "learning_rate": 7.094090802494447e-07, "loss": 0.4697, "step": 6022 }, { "epoch": 2.536712059525481, "grad_norm": 0.35002103447914124, "learning_rate": 7.08151164096918e-07, "loss": 0.3854, "step": 6023 }, { "epoch": 2.5371332303804577, "grad_norm": 0.37713342905044556, "learning_rate": 7.068942791935574e-07, "loss": 0.4408, "step": 6024 }, { "epoch": 2.5375544012354343, "grad_norm": 0.38597753643989563, "learning_rate": 7.056384258413701e-07, "loss": 0.3654, "step": 6025 }, { "epoch": 2.5379755720904114, "grad_norm": 0.41761699318885803, "learning_rate": 7.043836043421126e-07, "loss": 0.3979, "step": 6026 }, { "epoch": 2.5383967429453884, "grad_norm": 0.4709775745868683, "learning_rate": 7.031298149972931e-07, "loss": 0.4601, "step": 6027 }, { "epoch": 2.538817913800365, "grad_norm": 0.4350738823413849, "learning_rate": 7.018770581081757e-07, "loss": 0.4233, "step": 6028 }, { "epoch": 2.5392390846553416, "grad_norm": 0.3712916076183319, "learning_rate": 7.006253339757723e-07, "loss": 0.3839, "step": 6029 }, { "epoch": 2.5396602555103187, "grad_norm": 0.39773234724998474, "learning_rate": 6.993746429008497e-07, "loss": 0.3854, "step": 6030 }, { "epoch": 2.5400814263652958, "grad_norm": 0.3953840136528015, "learning_rate": 6.981249851839239e-07, "loss": 0.4182, "step": 6031 }, { "epoch": 2.5405025972202724, "grad_norm": 0.3832666575908661, "learning_rate": 6.968763611252632e-07, "loss": 0.3958, "step": 6032 }, { "epoch": 2.540923768075249, "grad_norm": 0.37217235565185547, "learning_rate": 6.956287710248899e-07, "loss": 0.372, "step": 6033 }, { "epoch": 2.541344938930226, "grad_norm": 0.4643930494785309, "learning_rate": 6.943822151825735e-07, "loss": 0.458, "step": 6034 }, { "epoch": 2.541766109785203, "grad_norm": 0.4162221848964691, "learning_rate": 6.931366938978401e-07, "loss": 0.378, "step": 6035 }, { "epoch": 2.5421872806401797, "grad_norm": 0.43639039993286133, "learning_rate": 6.918922074699642e-07, "loss": 0.4392, "step": 6036 }, { "epoch": 2.5426084514951564, "grad_norm": 0.3614782691001892, "learning_rate": 6.906487561979696e-07, "loss": 0.3874, "step": 6037 }, { "epoch": 2.5430296223501334, "grad_norm": 0.4018896520137787, "learning_rate": 6.894063403806361e-07, "loss": 0.3702, "step": 6038 }, { "epoch": 2.5434507932051105, "grad_norm": 0.41700783371925354, "learning_rate": 6.88164960316493e-07, "loss": 0.423, "step": 6039 }, { "epoch": 2.543871964060087, "grad_norm": 0.404977411031723, "learning_rate": 6.869246163038196e-07, "loss": 0.4423, "step": 6040 }, { "epoch": 2.5442931349150637, "grad_norm": 0.38665878772735596, "learning_rate": 6.856853086406451e-07, "loss": 0.3778, "step": 6041 }, { "epoch": 2.5447143057700408, "grad_norm": 0.3898293673992157, "learning_rate": 6.844470376247536e-07, "loss": 0.4176, "step": 6042 }, { "epoch": 2.545135476625018, "grad_norm": 0.4019174575805664, "learning_rate": 6.832098035536761e-07, "loss": 0.3895, "step": 6043 }, { "epoch": 2.5455566474799944, "grad_norm": 0.4301033020019531, "learning_rate": 6.819736067246985e-07, "loss": 0.4441, "step": 6044 }, { "epoch": 2.545977818334971, "grad_norm": 0.3690682351589203, "learning_rate": 6.807384474348538e-07, "loss": 0.2973, "step": 6045 }, { "epoch": 2.546398989189948, "grad_norm": 0.4545222222805023, "learning_rate": 6.795043259809264e-07, "loss": 0.4815, "step": 6046 }, { "epoch": 2.5468201600449247, "grad_norm": 0.4265226125717163, "learning_rate": 6.782712426594545e-07, "loss": 0.3632, "step": 6047 }, { "epoch": 2.547241330899902, "grad_norm": 0.4030183255672455, "learning_rate": 6.770391977667212e-07, "loss": 0.4095, "step": 6048 }, { "epoch": 2.5476625017548784, "grad_norm": 0.4330075979232788, "learning_rate": 6.758081915987669e-07, "loss": 0.4584, "step": 6049 }, { "epoch": 2.5480836726098555, "grad_norm": 0.4250657558441162, "learning_rate": 6.745782244513765e-07, "loss": 0.3985, "step": 6050 }, { "epoch": 2.548504843464832, "grad_norm": 0.42636191844940186, "learning_rate": 6.733492966200872e-07, "loss": 0.414, "step": 6051 }, { "epoch": 2.548926014319809, "grad_norm": 0.3958221971988678, "learning_rate": 6.721214084001892e-07, "loss": 0.4277, "step": 6052 }, { "epoch": 2.5493471851747858, "grad_norm": 0.4058200716972351, "learning_rate": 6.708945600867178e-07, "loss": 0.4037, "step": 6053 }, { "epoch": 2.549768356029763, "grad_norm": 0.4637411832809448, "learning_rate": 6.696687519744621e-07, "loss": 0.4258, "step": 6054 }, { "epoch": 2.5501895268847394, "grad_norm": 0.4340195953845978, "learning_rate": 6.684439843579632e-07, "loss": 0.4492, "step": 6055 }, { "epoch": 2.5506106977397165, "grad_norm": 0.47374647855758667, "learning_rate": 6.672202575315045e-07, "loss": 0.4224, "step": 6056 }, { "epoch": 2.551031868594693, "grad_norm": 0.47450390458106995, "learning_rate": 6.659975717891276e-07, "loss": 0.4585, "step": 6057 }, { "epoch": 2.55145303944967, "grad_norm": 0.40603065490722656, "learning_rate": 6.647759274246185e-07, "loss": 0.3585, "step": 6058 }, { "epoch": 2.551874210304647, "grad_norm": 0.3975535035133362, "learning_rate": 6.635553247315158e-07, "loss": 0.3836, "step": 6059 }, { "epoch": 2.552295381159624, "grad_norm": 0.4264683127403259, "learning_rate": 6.623357640031092e-07, "loss": 0.3925, "step": 6060 }, { "epoch": 2.5527165520146005, "grad_norm": 0.4042724668979645, "learning_rate": 6.611172455324317e-07, "loss": 0.3702, "step": 6061 }, { "epoch": 2.5531377228695775, "grad_norm": 0.43310800194740295, "learning_rate": 6.598997696122717e-07, "loss": 0.4123, "step": 6062 }, { "epoch": 2.553558893724554, "grad_norm": 0.36603349447250366, "learning_rate": 6.586833365351664e-07, "loss": 0.3567, "step": 6063 }, { "epoch": 2.553980064579531, "grad_norm": 0.4367540180683136, "learning_rate": 6.574679465933998e-07, "loss": 0.471, "step": 6064 }, { "epoch": 2.554401235434508, "grad_norm": 0.36003080010414124, "learning_rate": 6.562536000790082e-07, "loss": 0.323, "step": 6065 }, { "epoch": 2.554822406289485, "grad_norm": 0.4072248637676239, "learning_rate": 6.550402972837761e-07, "loss": 0.4309, "step": 6066 }, { "epoch": 2.5552435771444615, "grad_norm": 0.4861108362674713, "learning_rate": 6.538280384992346e-07, "loss": 0.4155, "step": 6067 }, { "epoch": 2.5556647479994385, "grad_norm": 0.43433982133865356, "learning_rate": 6.526168240166686e-07, "loss": 0.39, "step": 6068 }, { "epoch": 2.556085918854415, "grad_norm": 0.4224456250667572, "learning_rate": 6.514066541271085e-07, "loss": 0.4331, "step": 6069 }, { "epoch": 2.556507089709392, "grad_norm": 0.42264416813850403, "learning_rate": 6.501975291213364e-07, "loss": 0.402, "step": 6070 }, { "epoch": 2.556928260564369, "grad_norm": 0.39927804470062256, "learning_rate": 6.489894492898807e-07, "loss": 0.3882, "step": 6071 }, { "epoch": 2.557349431419346, "grad_norm": 0.3801254332065582, "learning_rate": 6.477824149230188e-07, "loss": 0.4065, "step": 6072 }, { "epoch": 2.5577706022743225, "grad_norm": 0.4127311110496521, "learning_rate": 6.46576426310781e-07, "loss": 0.4173, "step": 6073 }, { "epoch": 2.5581917731292996, "grad_norm": 0.4470324218273163, "learning_rate": 6.453714837429403e-07, "loss": 0.4132, "step": 6074 }, { "epoch": 2.558612943984276, "grad_norm": 0.44573304057121277, "learning_rate": 6.441675875090237e-07, "loss": 0.4291, "step": 6075 }, { "epoch": 2.5590341148392532, "grad_norm": 0.3895280957221985, "learning_rate": 6.429647378983033e-07, "loss": 0.3963, "step": 6076 }, { "epoch": 2.55945528569423, "grad_norm": 0.39771243929862976, "learning_rate": 6.417629351998001e-07, "loss": 0.4099, "step": 6077 }, { "epoch": 2.559876456549207, "grad_norm": 0.4183800518512726, "learning_rate": 6.405621797022848e-07, "loss": 0.4357, "step": 6078 }, { "epoch": 2.5602976274041835, "grad_norm": 0.4081176817417145, "learning_rate": 6.393624716942775e-07, "loss": 0.45, "step": 6079 }, { "epoch": 2.5607187982591606, "grad_norm": 0.42261481285095215, "learning_rate": 6.381638114640437e-07, "loss": 0.4258, "step": 6080 }, { "epoch": 2.561139969114137, "grad_norm": 0.4146954417228699, "learning_rate": 6.369661992995979e-07, "loss": 0.4035, "step": 6081 }, { "epoch": 2.5615611399691143, "grad_norm": 0.43072420358657837, "learning_rate": 6.35769635488705e-07, "loss": 0.3684, "step": 6082 }, { "epoch": 2.561982310824091, "grad_norm": 0.38493606448173523, "learning_rate": 6.345741203188749e-07, "loss": 0.3416, "step": 6083 }, { "epoch": 2.562403481679068, "grad_norm": 0.4051729142665863, "learning_rate": 6.333796540773685e-07, "loss": 0.3942, "step": 6084 }, { "epoch": 2.5628246525340446, "grad_norm": 0.42747676372528076, "learning_rate": 6.321862370511922e-07, "loss": 0.4184, "step": 6085 }, { "epoch": 2.5632458233890216, "grad_norm": 0.41877540946006775, "learning_rate": 6.309938695271006e-07, "loss": 0.4218, "step": 6086 }, { "epoch": 2.5636669942439982, "grad_norm": 0.45639586448669434, "learning_rate": 6.298025517915985e-07, "loss": 0.4275, "step": 6087 }, { "epoch": 2.5640881650989753, "grad_norm": 0.43046078085899353, "learning_rate": 6.28612284130935e-07, "loss": 0.4109, "step": 6088 }, { "epoch": 2.564509335953952, "grad_norm": 0.4018413722515106, "learning_rate": 6.27423066831111e-07, "loss": 0.4296, "step": 6089 }, { "epoch": 2.564930506808929, "grad_norm": 0.42420855164527893, "learning_rate": 6.262349001778711e-07, "loss": 0.436, "step": 6090 }, { "epoch": 2.5653516776639056, "grad_norm": 0.41710469126701355, "learning_rate": 6.250477844567076e-07, "loss": 0.4011, "step": 6091 }, { "epoch": 2.5657728485188827, "grad_norm": 0.36598384380340576, "learning_rate": 6.238617199528652e-07, "loss": 0.3603, "step": 6092 }, { "epoch": 2.5661940193738593, "grad_norm": 0.4038952887058258, "learning_rate": 6.226767069513289e-07, "loss": 0.4513, "step": 6093 }, { "epoch": 2.5666151902288363, "grad_norm": 0.37414273619651794, "learning_rate": 6.214927457368363e-07, "loss": 0.4204, "step": 6094 }, { "epoch": 2.567036361083813, "grad_norm": 0.36703649163246155, "learning_rate": 6.203098365938731e-07, "loss": 0.341, "step": 6095 }, { "epoch": 2.56745753193879, "grad_norm": 0.42068082094192505, "learning_rate": 6.191279798066646e-07, "loss": 0.4351, "step": 6096 }, { "epoch": 2.5678787027937666, "grad_norm": 0.3821226954460144, "learning_rate": 6.179471756591915e-07, "loss": 0.4064, "step": 6097 }, { "epoch": 2.5682998736487432, "grad_norm": 0.3143140375614166, "learning_rate": 6.16767424435179e-07, "loss": 0.3229, "step": 6098 }, { "epoch": 2.5687210445037203, "grad_norm": 0.43414896726608276, "learning_rate": 6.155887264180971e-07, "loss": 0.4154, "step": 6099 }, { "epoch": 2.5691422153586974, "grad_norm": 0.4234369695186615, "learning_rate": 6.144110818911652e-07, "loss": 0.4428, "step": 6100 }, { "epoch": 2.569563386213674, "grad_norm": 0.44120025634765625, "learning_rate": 6.132344911373494e-07, "loss": 0.4209, "step": 6101 }, { "epoch": 2.5699845570686506, "grad_norm": 0.4102826714515686, "learning_rate": 6.120589544393596e-07, "loss": 0.4158, "step": 6102 }, { "epoch": 2.5704057279236276, "grad_norm": 0.4163912236690521, "learning_rate": 6.108844720796575e-07, "loss": 0.3878, "step": 6103 }, { "epoch": 2.5708268987786047, "grad_norm": 0.39497438073158264, "learning_rate": 6.097110443404469e-07, "loss": 0.3997, "step": 6104 }, { "epoch": 2.5712480696335813, "grad_norm": 0.4011504352092743, "learning_rate": 6.085386715036812e-07, "loss": 0.4027, "step": 6105 }, { "epoch": 2.571669240488558, "grad_norm": 0.36375948786735535, "learning_rate": 6.073673538510588e-07, "loss": 0.3125, "step": 6106 }, { "epoch": 2.572090411343535, "grad_norm": 0.47389790415763855, "learning_rate": 6.061970916640236e-07, "loss": 0.4961, "step": 6107 }, { "epoch": 2.572511582198512, "grad_norm": 0.41829484701156616, "learning_rate": 6.050278852237701e-07, "loss": 0.3944, "step": 6108 }, { "epoch": 2.5729327530534887, "grad_norm": 0.4241446554660797, "learning_rate": 6.03859734811233e-07, "loss": 0.4593, "step": 6109 }, { "epoch": 2.5733539239084653, "grad_norm": 0.3597286343574524, "learning_rate": 6.026926407070988e-07, "loss": 0.3503, "step": 6110 }, { "epoch": 2.5737750947634424, "grad_norm": 0.374674916267395, "learning_rate": 6.015266031917977e-07, "loss": 0.4303, "step": 6111 }, { "epoch": 2.5741962656184194, "grad_norm": 0.41712749004364014, "learning_rate": 6.003616225455045e-07, "loss": 0.3911, "step": 6112 }, { "epoch": 2.574617436473396, "grad_norm": 0.3834887742996216, "learning_rate": 5.991976990481424e-07, "loss": 0.3991, "step": 6113 }, { "epoch": 2.5750386073283726, "grad_norm": 0.40482452511787415, "learning_rate": 5.980348329793817e-07, "loss": 0.3941, "step": 6114 }, { "epoch": 2.5754597781833497, "grad_norm": 0.4151337146759033, "learning_rate": 5.968730246186361e-07, "loss": 0.4093, "step": 6115 }, { "epoch": 2.5758809490383268, "grad_norm": 0.45813173055648804, "learning_rate": 5.95712274245065e-07, "loss": 0.4069, "step": 6116 }, { "epoch": 2.5763021198933034, "grad_norm": 0.4466021955013275, "learning_rate": 5.945525821375736e-07, "loss": 0.3862, "step": 6117 }, { "epoch": 2.57672329074828, "grad_norm": 0.4159059226512909, "learning_rate": 5.933939485748153e-07, "loss": 0.3869, "step": 6118 }, { "epoch": 2.577144461603257, "grad_norm": 0.3906993865966797, "learning_rate": 5.922363738351888e-07, "loss": 0.3994, "step": 6119 }, { "epoch": 2.577565632458234, "grad_norm": 0.4166383445262909, "learning_rate": 5.910798581968352e-07, "loss": 0.4487, "step": 6120 }, { "epoch": 2.5779868033132107, "grad_norm": 0.43330082297325134, "learning_rate": 5.899244019376427e-07, "loss": 0.3954, "step": 6121 }, { "epoch": 2.5784079741681873, "grad_norm": 0.38899219036102295, "learning_rate": 5.887700053352474e-07, "loss": 0.3841, "step": 6122 }, { "epoch": 2.5788291450231644, "grad_norm": 0.4399867653846741, "learning_rate": 5.876166686670265e-07, "loss": 0.4832, "step": 6123 }, { "epoch": 2.5792503158781415, "grad_norm": 0.4273865818977356, "learning_rate": 5.864643922101065e-07, "loss": 0.3906, "step": 6124 }, { "epoch": 2.579671486733118, "grad_norm": 0.3650389015674591, "learning_rate": 5.853131762413572e-07, "loss": 0.365, "step": 6125 }, { "epoch": 2.5800926575880947, "grad_norm": 0.3892350494861603, "learning_rate": 5.841630210373916e-07, "loss": 0.4039, "step": 6126 }, { "epoch": 2.5805138284430718, "grad_norm": 0.4721076190471649, "learning_rate": 5.830139268745732e-07, "loss": 0.4535, "step": 6127 }, { "epoch": 2.580934999298049, "grad_norm": 0.3974190056324005, "learning_rate": 5.818658940290045e-07, "loss": 0.3726, "step": 6128 }, { "epoch": 2.5813561701530254, "grad_norm": 0.40860414505004883, "learning_rate": 5.80718922776538e-07, "loss": 0.4161, "step": 6129 }, { "epoch": 2.581777341008002, "grad_norm": 0.40356388688087463, "learning_rate": 5.79573013392768e-07, "loss": 0.3708, "step": 6130 }, { "epoch": 2.582198511862979, "grad_norm": 0.411231130361557, "learning_rate": 5.784281661530339e-07, "loss": 0.3875, "step": 6131 }, { "epoch": 2.582619682717956, "grad_norm": 0.4598008096218109, "learning_rate": 5.772843813324225e-07, "loss": 0.445, "step": 6132 }, { "epoch": 2.583040853572933, "grad_norm": 0.42818060517311096, "learning_rate": 5.761416592057606e-07, "loss": 0.367, "step": 6133 }, { "epoch": 2.5834620244279094, "grad_norm": 0.42662864923477173, "learning_rate": 5.750000000476258e-07, "loss": 0.4315, "step": 6134 }, { "epoch": 2.5838831952828865, "grad_norm": 0.3730015754699707, "learning_rate": 5.738594041323348e-07, "loss": 0.4024, "step": 6135 }, { "epoch": 2.5843043661378635, "grad_norm": 0.41428980231285095, "learning_rate": 5.727198717339511e-07, "loss": 0.4331, "step": 6136 }, { "epoch": 2.58472553699284, "grad_norm": 0.6119223237037659, "learning_rate": 5.715814031262823e-07, "loss": 0.4012, "step": 6137 }, { "epoch": 2.5851467078478167, "grad_norm": 0.41054555773735046, "learning_rate": 5.704439985828825e-07, "loss": 0.3825, "step": 6138 }, { "epoch": 2.585567878702794, "grad_norm": 0.41352376341819763, "learning_rate": 5.693076583770468e-07, "loss": 0.4406, "step": 6139 }, { "epoch": 2.585989049557771, "grad_norm": 0.3584120571613312, "learning_rate": 5.681723827818153e-07, "loss": 0.358, "step": 6140 }, { "epoch": 2.5864102204127475, "grad_norm": 0.4009607434272766, "learning_rate": 5.670381720699747e-07, "loss": 0.4101, "step": 6141 }, { "epoch": 2.586831391267724, "grad_norm": 0.40182560682296753, "learning_rate": 5.659050265140525e-07, "loss": 0.3867, "step": 6142 }, { "epoch": 2.587252562122701, "grad_norm": 0.44969260692596436, "learning_rate": 5.647729463863232e-07, "loss": 0.4148, "step": 6143 }, { "epoch": 2.587673732977678, "grad_norm": 0.4477101266384125, "learning_rate": 5.636419319588044e-07, "loss": 0.4475, "step": 6144 }, { "epoch": 2.588094903832655, "grad_norm": 0.36701297760009766, "learning_rate": 5.62511983503255e-07, "loss": 0.3509, "step": 6145 }, { "epoch": 2.5885160746876315, "grad_norm": 0.42771413922309875, "learning_rate": 5.613831012911819e-07, "loss": 0.4216, "step": 6146 }, { "epoch": 2.5889372455426085, "grad_norm": 0.43857434391975403, "learning_rate": 5.602552855938326e-07, "loss": 0.3942, "step": 6147 }, { "epoch": 2.589358416397585, "grad_norm": 0.43928059935569763, "learning_rate": 5.591285366822019e-07, "loss": 0.4413, "step": 6148 }, { "epoch": 2.589779587252562, "grad_norm": 0.3965202867984772, "learning_rate": 5.580028548270239e-07, "loss": 0.3917, "step": 6149 }, { "epoch": 2.590200758107539, "grad_norm": 0.38909226655960083, "learning_rate": 5.568782402987782e-07, "loss": 0.3937, "step": 6150 }, { "epoch": 2.590621928962516, "grad_norm": 0.3860815465450287, "learning_rate": 5.557546933676899e-07, "loss": 0.4207, "step": 6151 }, { "epoch": 2.5910430998174925, "grad_norm": 0.4248045086860657, "learning_rate": 5.546322143037236e-07, "loss": 0.4348, "step": 6152 }, { "epoch": 2.5914642706724695, "grad_norm": 0.36670106649398804, "learning_rate": 5.535108033765913e-07, "loss": 0.3912, "step": 6153 }, { "epoch": 2.591885441527446, "grad_norm": 0.4024182856082916, "learning_rate": 5.523904608557479e-07, "loss": 0.3728, "step": 6154 }, { "epoch": 2.592306612382423, "grad_norm": 0.3855992257595062, "learning_rate": 5.512711870103865e-07, "loss": 0.429, "step": 6155 }, { "epoch": 2.5927277832374, "grad_norm": 0.40979716181755066, "learning_rate": 5.501529821094492e-07, "loss": 0.4069, "step": 6156 }, { "epoch": 2.593148954092377, "grad_norm": 0.4092758595943451, "learning_rate": 5.4903584642162e-07, "loss": 0.3984, "step": 6157 }, { "epoch": 2.5935701249473535, "grad_norm": 0.39397817850112915, "learning_rate": 5.479197802153241e-07, "loss": 0.4218, "step": 6158 }, { "epoch": 2.5939912958023306, "grad_norm": 0.3795793652534485, "learning_rate": 5.468047837587314e-07, "loss": 0.3548, "step": 6159 }, { "epoch": 2.594412466657307, "grad_norm": 0.3985418975353241, "learning_rate": 5.456908573197545e-07, "loss": 0.3924, "step": 6160 }, { "epoch": 2.5948336375122842, "grad_norm": 0.42468929290771484, "learning_rate": 5.445780011660473e-07, "loss": 0.486, "step": 6161 }, { "epoch": 2.595254808367261, "grad_norm": 0.39986875653266907, "learning_rate": 5.434662155650094e-07, "loss": 0.3855, "step": 6162 }, { "epoch": 2.595675979222238, "grad_norm": 0.36314696073532104, "learning_rate": 5.423555007837799e-07, "loss": 0.3838, "step": 6163 }, { "epoch": 2.5960971500772145, "grad_norm": 0.4039018750190735, "learning_rate": 5.412458570892448e-07, "loss": 0.4461, "step": 6164 }, { "epoch": 2.5965183209321916, "grad_norm": 0.39274361729621887, "learning_rate": 5.401372847480285e-07, "loss": 0.4354, "step": 6165 }, { "epoch": 2.596939491787168, "grad_norm": 0.37251707911491394, "learning_rate": 5.390297840264986e-07, "loss": 0.3289, "step": 6166 }, { "epoch": 2.5973606626421453, "grad_norm": 0.38928869366645813, "learning_rate": 5.379233551907692e-07, "loss": 0.4188, "step": 6167 }, { "epoch": 2.597781833497122, "grad_norm": 0.4417869746685028, "learning_rate": 5.368179985066912e-07, "loss": 0.4705, "step": 6168 }, { "epoch": 2.598203004352099, "grad_norm": 0.39634668827056885, "learning_rate": 5.357137142398633e-07, "loss": 0.4088, "step": 6169 }, { "epoch": 2.5986241752070756, "grad_norm": 0.36891311407089233, "learning_rate": 5.346105026556226e-07, "loss": 0.3503, "step": 6170 }, { "epoch": 2.5990453460620526, "grad_norm": 0.45923301577568054, "learning_rate": 5.335083640190481e-07, "loss": 0.4378, "step": 6171 }, { "epoch": 2.5994665169170292, "grad_norm": 0.4546692669391632, "learning_rate": 5.324072985949646e-07, "loss": 0.3923, "step": 6172 }, { "epoch": 2.5998876877720063, "grad_norm": 0.44184666872024536, "learning_rate": 5.31307306647938e-07, "loss": 0.414, "step": 6173 }, { "epoch": 2.600308858626983, "grad_norm": 0.4259422719478607, "learning_rate": 5.302083884422738e-07, "loss": 0.4611, "step": 6174 }, { "epoch": 2.60073002948196, "grad_norm": 0.37568041682243347, "learning_rate": 5.291105442420208e-07, "loss": 0.3407, "step": 6175 }, { "epoch": 2.6011512003369366, "grad_norm": 0.44902393221855164, "learning_rate": 5.280137743109698e-07, "loss": 0.4452, "step": 6176 }, { "epoch": 2.6015723711919136, "grad_norm": 0.3628503084182739, "learning_rate": 5.269180789126543e-07, "loss": 0.3192, "step": 6177 }, { "epoch": 2.6019935420468903, "grad_norm": 0.45234015583992004, "learning_rate": 5.258234583103495e-07, "loss": 0.4844, "step": 6178 }, { "epoch": 2.6024147129018673, "grad_norm": 0.41866961121559143, "learning_rate": 5.247299127670714e-07, "loss": 0.4037, "step": 6179 }, { "epoch": 2.602835883756844, "grad_norm": 0.4312553107738495, "learning_rate": 5.23637442545577e-07, "loss": 0.3907, "step": 6180 }, { "epoch": 2.603257054611821, "grad_norm": 0.4495203197002411, "learning_rate": 5.225460479083672e-07, "loss": 0.4624, "step": 6181 }, { "epoch": 2.6036782254667976, "grad_norm": 0.40410315990448, "learning_rate": 5.214557291176824e-07, "loss": 0.3549, "step": 6182 }, { "epoch": 2.6040993963217747, "grad_norm": 0.4221743047237396, "learning_rate": 5.203664864355068e-07, "loss": 0.4474, "step": 6183 }, { "epoch": 2.6045205671767513, "grad_norm": 0.4090854525566101, "learning_rate": 5.192783201235635e-07, "loss": 0.3477, "step": 6184 }, { "epoch": 2.6049417380317283, "grad_norm": 0.4819706380367279, "learning_rate": 5.181912304433167e-07, "loss": 0.472, "step": 6185 }, { "epoch": 2.605362908886705, "grad_norm": 0.384501188993454, "learning_rate": 5.17105217655976e-07, "loss": 0.3382, "step": 6186 }, { "epoch": 2.605784079741682, "grad_norm": 0.48426494002342224, "learning_rate": 5.160202820224875e-07, "loss": 0.4335, "step": 6187 }, { "epoch": 2.6062052505966586, "grad_norm": 0.4321582317352295, "learning_rate": 5.149364238035409e-07, "loss": 0.4228, "step": 6188 }, { "epoch": 2.6066264214516357, "grad_norm": 0.35969212651252747, "learning_rate": 5.138536432595686e-07, "loss": 0.347, "step": 6189 }, { "epoch": 2.6070475923066123, "grad_norm": 0.43323445320129395, "learning_rate": 5.127719406507386e-07, "loss": 0.4301, "step": 6190 }, { "epoch": 2.6074687631615894, "grad_norm": 0.4045359790325165, "learning_rate": 5.116913162369663e-07, "loss": 0.3794, "step": 6191 }, { "epoch": 2.607889934016566, "grad_norm": 0.3636764585971832, "learning_rate": 5.106117702779024e-07, "loss": 0.3962, "step": 6192 }, { "epoch": 2.608311104871543, "grad_norm": 0.4039274752140045, "learning_rate": 5.09533303032943e-07, "loss": 0.4352, "step": 6193 }, { "epoch": 2.6087322757265197, "grad_norm": 0.41619017720222473, "learning_rate": 5.084559147612244e-07, "loss": 0.4084, "step": 6194 }, { "epoch": 2.6091534465814967, "grad_norm": 0.41430869698524475, "learning_rate": 5.073796057216185e-07, "loss": 0.396, "step": 6195 }, { "epoch": 2.6095746174364733, "grad_norm": 0.47939884662628174, "learning_rate": 5.063043761727443e-07, "loss": 0.4304, "step": 6196 }, { "epoch": 2.6099957882914504, "grad_norm": 0.39607739448547363, "learning_rate": 5.052302263729592e-07, "loss": 0.3868, "step": 6197 }, { "epoch": 2.610416959146427, "grad_norm": 0.4331662654876709, "learning_rate": 5.041571565803599e-07, "loss": 0.4373, "step": 6198 }, { "epoch": 2.6108381300014036, "grad_norm": 0.3668025732040405, "learning_rate": 5.030851670527853e-07, "loss": 0.3617, "step": 6199 }, { "epoch": 2.6112593008563807, "grad_norm": 0.4274585545063019, "learning_rate": 5.020142580478138e-07, "loss": 0.4137, "step": 6200 }, { "epoch": 2.6116804717113578, "grad_norm": 0.4076821804046631, "learning_rate": 5.009444298227628e-07, "loss": 0.3623, "step": 6201 }, { "epoch": 2.6121016425663344, "grad_norm": 0.40210556983947754, "learning_rate": 4.99875682634694e-07, "loss": 0.4207, "step": 6202 }, { "epoch": 2.612522813421311, "grad_norm": 0.40166300535202026, "learning_rate": 4.988080167404047e-07, "loss": 0.3631, "step": 6203 }, { "epoch": 2.612943984276288, "grad_norm": 0.420672744512558, "learning_rate": 4.977414323964364e-07, "loss": 0.3796, "step": 6204 }, { "epoch": 2.613365155131265, "grad_norm": 0.3872200548648834, "learning_rate": 4.966759298590684e-07, "loss": 0.3935, "step": 6205 }, { "epoch": 2.6137863259862417, "grad_norm": 0.42747607827186584, "learning_rate": 4.956115093843189e-07, "loss": 0.4331, "step": 6206 }, { "epoch": 2.6142074968412183, "grad_norm": 0.38655635714530945, "learning_rate": 4.945481712279504e-07, "loss": 0.3559, "step": 6207 }, { "epoch": 2.6146286676961954, "grad_norm": 0.4658649265766144, "learning_rate": 4.934859156454608e-07, "loss": 0.4586, "step": 6208 }, { "epoch": 2.6150498385511725, "grad_norm": 0.40527090430259705, "learning_rate": 4.924247428920914e-07, "loss": 0.4027, "step": 6209 }, { "epoch": 2.615471009406149, "grad_norm": 0.3897411525249481, "learning_rate": 4.913646532228206e-07, "loss": 0.4086, "step": 6210 }, { "epoch": 2.6158921802611257, "grad_norm": 0.3885400891304016, "learning_rate": 4.903056468923672e-07, "loss": 0.3716, "step": 6211 }, { "epoch": 2.6163133511161027, "grad_norm": 0.47043371200561523, "learning_rate": 4.892477241551902e-07, "loss": 0.441, "step": 6212 }, { "epoch": 2.61673452197108, "grad_norm": 0.36010849475860596, "learning_rate": 4.881908852654904e-07, "loss": 0.3719, "step": 6213 }, { "epoch": 2.6171556928260564, "grad_norm": 0.4390113353729248, "learning_rate": 4.871351304772043e-07, "loss": 0.4641, "step": 6214 }, { "epoch": 2.617576863681033, "grad_norm": 0.42943769693374634, "learning_rate": 4.860804600440083e-07, "loss": 0.4356, "step": 6215 }, { "epoch": 2.61799803453601, "grad_norm": 0.3920472264289856, "learning_rate": 4.850268742193225e-07, "loss": 0.3719, "step": 6216 }, { "epoch": 2.618419205390987, "grad_norm": 0.41193506121635437, "learning_rate": 4.839743732563007e-07, "loss": 0.3858, "step": 6217 }, { "epoch": 2.6188403762459638, "grad_norm": 0.42058616876602173, "learning_rate": 4.829229574078403e-07, "loss": 0.452, "step": 6218 }, { "epoch": 2.6192615471009404, "grad_norm": 0.42920222878456116, "learning_rate": 4.818726269265767e-07, "loss": 0.4022, "step": 6219 }, { "epoch": 2.6196827179559175, "grad_norm": 0.40685945749282837, "learning_rate": 4.808233820648817e-07, "loss": 0.4338, "step": 6220 }, { "epoch": 2.6201038888108945, "grad_norm": 0.4058244228363037, "learning_rate": 4.797752230748721e-07, "loss": 0.4483, "step": 6221 }, { "epoch": 2.620525059665871, "grad_norm": 0.4373600482940674, "learning_rate": 4.78728150208398e-07, "loss": 0.4406, "step": 6222 }, { "epoch": 2.6209462305208477, "grad_norm": 0.4144936203956604, "learning_rate": 4.776821637170525e-07, "loss": 0.4091, "step": 6223 }, { "epoch": 2.621367401375825, "grad_norm": 0.396290123462677, "learning_rate": 4.76637263852166e-07, "loss": 0.3728, "step": 6224 }, { "epoch": 2.621788572230802, "grad_norm": 0.41284793615341187, "learning_rate": 4.7559345086480577e-07, "loss": 0.4539, "step": 6225 }, { "epoch": 2.6222097430857785, "grad_norm": 0.4308437705039978, "learning_rate": 4.745507250057835e-07, "loss": 0.4604, "step": 6226 }, { "epoch": 2.622630913940755, "grad_norm": 0.39466992020606995, "learning_rate": 4.735090865256431e-07, "loss": 0.3553, "step": 6227 }, { "epoch": 2.623052084795732, "grad_norm": 0.42640385031700134, "learning_rate": 4.724685356746722e-07, "loss": 0.3957, "step": 6228 }, { "epoch": 2.623473255650709, "grad_norm": 0.4201566278934479, "learning_rate": 4.7142907270289695e-07, "loss": 0.4189, "step": 6229 }, { "epoch": 2.623894426505686, "grad_norm": 0.3969421982765198, "learning_rate": 4.703906978600764e-07, "loss": 0.4055, "step": 6230 }, { "epoch": 2.6243155973606624, "grad_norm": 0.3847027122974396, "learning_rate": 4.6935341139571377e-07, "loss": 0.3971, "step": 6231 }, { "epoch": 2.6247367682156395, "grad_norm": 0.3699474334716797, "learning_rate": 4.683172135590508e-07, "loss": 0.3817, "step": 6232 }, { "epoch": 2.6251579390706166, "grad_norm": 0.3778850734233856, "learning_rate": 4.672821045990644e-07, "loss": 0.4167, "step": 6233 }, { "epoch": 2.625579109925593, "grad_norm": 0.4160134792327881, "learning_rate": 4.6624808476447235e-07, "loss": 0.4192, "step": 6234 }, { "epoch": 2.62600028078057, "grad_norm": 0.4281671941280365, "learning_rate": 4.6521515430372757e-07, "loss": 0.4683, "step": 6235 }, { "epoch": 2.626421451635547, "grad_norm": 0.3792080879211426, "learning_rate": 4.6418331346502587e-07, "loss": 0.3877, "step": 6236 }, { "epoch": 2.626842622490524, "grad_norm": 0.3510574400424957, "learning_rate": 4.6315256249629846e-07, "loss": 0.3529, "step": 6237 }, { "epoch": 2.6272637933455005, "grad_norm": 0.4660769999027252, "learning_rate": 4.6212290164521554e-07, "loss": 0.4274, "step": 6238 }, { "epoch": 2.627684964200477, "grad_norm": 0.40356719493865967, "learning_rate": 4.610943311591826e-07, "loss": 0.4054, "step": 6239 }, { "epoch": 2.628106135055454, "grad_norm": 0.395894318819046, "learning_rate": 4.6006685128534843e-07, "loss": 0.3884, "step": 6240 }, { "epoch": 2.6285273059104313, "grad_norm": 0.4118223190307617, "learning_rate": 4.590404622705941e-07, "loss": 0.3826, "step": 6241 }, { "epoch": 2.628948476765408, "grad_norm": 0.4396027624607086, "learning_rate": 4.5801516436154326e-07, "loss": 0.4248, "step": 6242 }, { "epoch": 2.6293696476203845, "grad_norm": 0.4133011996746063, "learning_rate": 4.5699095780455503e-07, "loss": 0.4059, "step": 6243 }, { "epoch": 2.6297908184753616, "grad_norm": 0.47010576725006104, "learning_rate": 4.5596784284572537e-07, "loss": 0.4403, "step": 6244 }, { "epoch": 2.6302119893303386, "grad_norm": 0.43694064021110535, "learning_rate": 4.549458197308909e-07, "loss": 0.3997, "step": 6245 }, { "epoch": 2.6306331601853152, "grad_norm": 0.38310515880584717, "learning_rate": 4.5392488870562246e-07, "loss": 0.4101, "step": 6246 }, { "epoch": 2.631054331040292, "grad_norm": 0.3736867606639862, "learning_rate": 4.5290505001523155e-07, "loss": 0.379, "step": 6247 }, { "epoch": 2.631475501895269, "grad_norm": 0.45259806513786316, "learning_rate": 4.518863039047672e-07, "loss": 0.4045, "step": 6248 }, { "epoch": 2.6318966727502455, "grad_norm": 0.39883264899253845, "learning_rate": 4.5086865061901076e-07, "loss": 0.3824, "step": 6249 }, { "epoch": 2.6323178436052226, "grad_norm": 0.40787240862846375, "learning_rate": 4.4985209040248833e-07, "loss": 0.3856, "step": 6250 }, { "epoch": 2.632739014460199, "grad_norm": 0.4058144688606262, "learning_rate": 4.4883662349945787e-07, "loss": 0.4001, "step": 6251 }, { "epoch": 2.6331601853151763, "grad_norm": 0.42798230051994324, "learning_rate": 4.4782225015391754e-07, "loss": 0.4191, "step": 6252 }, { "epoch": 2.633581356170153, "grad_norm": 0.4450957477092743, "learning_rate": 4.468089706096035e-07, "loss": 0.44, "step": 6253 }, { "epoch": 2.63400252702513, "grad_norm": 0.4414149820804596, "learning_rate": 4.457967851099831e-07, "loss": 0.4151, "step": 6254 }, { "epoch": 2.6344236978801066, "grad_norm": 0.44037941098213196, "learning_rate": 4.4478569389826864e-07, "loss": 0.3864, "step": 6255 }, { "epoch": 2.6348448687350836, "grad_norm": 0.3973613381385803, "learning_rate": 4.4377569721740565e-07, "loss": 0.3711, "step": 6256 }, { "epoch": 2.6352660395900602, "grad_norm": 0.43302902579307556, "learning_rate": 4.4276679531007505e-07, "loss": 0.4081, "step": 6257 }, { "epoch": 2.6356872104450373, "grad_norm": 0.4211116135120392, "learning_rate": 4.417589884186996e-07, "loss": 0.4245, "step": 6258 }, { "epoch": 2.636108381300014, "grad_norm": 0.43567508459091187, "learning_rate": 4.407522767854344e-07, "loss": 0.4282, "step": 6259 }, { "epoch": 2.636529552154991, "grad_norm": 0.358547180891037, "learning_rate": 4.3974666065217165e-07, "loss": 0.3798, "step": 6260 }, { "epoch": 2.6369507230099676, "grad_norm": 0.37117043137550354, "learning_rate": 4.3874214026054464e-07, "loss": 0.3424, "step": 6261 }, { "epoch": 2.6373718938649446, "grad_norm": 0.4190497398376465, "learning_rate": 4.377387158519175e-07, "loss": 0.426, "step": 6262 }, { "epoch": 2.6377930647199213, "grad_norm": 0.3289560377597809, "learning_rate": 4.367363876673958e-07, "loss": 0.3682, "step": 6263 }, { "epoch": 2.6382142355748983, "grad_norm": 0.39988890290260315, "learning_rate": 4.3573515594782014e-07, "loss": 0.4018, "step": 6264 }, { "epoch": 2.638635406429875, "grad_norm": 0.41628751158714294, "learning_rate": 4.3473502093376475e-07, "loss": 0.4245, "step": 6265 }, { "epoch": 2.639056577284852, "grad_norm": 0.42073196172714233, "learning_rate": 4.337359828655463e-07, "loss": 0.3809, "step": 6266 }, { "epoch": 2.6394777481398286, "grad_norm": 0.4507080316543579, "learning_rate": 4.327380419832117e-07, "loss": 0.4033, "step": 6267 }, { "epoch": 2.6398989189948057, "grad_norm": 0.4877113103866577, "learning_rate": 4.3174119852654906e-07, "loss": 0.4367, "step": 6268 }, { "epoch": 2.6403200898497823, "grad_norm": 0.43535423278808594, "learning_rate": 4.3074545273507963e-07, "loss": 0.4047, "step": 6269 }, { "epoch": 2.6407412607047593, "grad_norm": 0.45562744140625, "learning_rate": 4.29750804848062e-07, "loss": 0.4544, "step": 6270 }, { "epoch": 2.641162431559736, "grad_norm": 0.36625155806541443, "learning_rate": 4.287572551044916e-07, "loss": 0.3725, "step": 6271 }, { "epoch": 2.641583602414713, "grad_norm": 0.4085838794708252, "learning_rate": 4.277648037430998e-07, "loss": 0.4038, "step": 6272 }, { "epoch": 2.6420047732696896, "grad_norm": 0.416746586561203, "learning_rate": 4.2677345100235356e-07, "loss": 0.38, "step": 6273 }, { "epoch": 2.6424259441246667, "grad_norm": 0.461753249168396, "learning_rate": 4.257831971204546e-07, "loss": 0.4488, "step": 6274 }, { "epoch": 2.6428471149796433, "grad_norm": 0.43602854013442993, "learning_rate": 4.247940423353436e-07, "loss": 0.4614, "step": 6275 }, { "epoch": 2.6432682858346204, "grad_norm": 0.42189469933509827, "learning_rate": 4.2380598688469386e-07, "loss": 0.425, "step": 6276 }, { "epoch": 2.643689456689597, "grad_norm": 0.4105921983718872, "learning_rate": 4.228190310059183e-07, "loss": 0.4119, "step": 6277 }, { "epoch": 2.644110627544574, "grad_norm": 0.4031617343425751, "learning_rate": 4.218331749361626e-07, "loss": 0.399, "step": 6278 }, { "epoch": 2.6445317983995507, "grad_norm": 0.4279930293560028, "learning_rate": 4.20848418912308e-07, "loss": 0.4204, "step": 6279 }, { "epoch": 2.6449529692545277, "grad_norm": 0.3795246481895447, "learning_rate": 4.198647631709746e-07, "loss": 0.3623, "step": 6280 }, { "epoch": 2.6453741401095043, "grad_norm": 0.45216262340545654, "learning_rate": 4.1888220794851386e-07, "loss": 0.4458, "step": 6281 }, { "epoch": 2.6457953109644814, "grad_norm": 0.3939811885356903, "learning_rate": 4.17900753481017e-07, "loss": 0.4222, "step": 6282 }, { "epoch": 2.646216481819458, "grad_norm": 0.42087844014167786, "learning_rate": 4.1692040000430756e-07, "loss": 0.4229, "step": 6283 }, { "epoch": 2.646637652674435, "grad_norm": 0.3644181787967682, "learning_rate": 4.1594114775394544e-07, "loss": 0.4028, "step": 6284 }, { "epoch": 2.6470588235294117, "grad_norm": 0.4226877987384796, "learning_rate": 4.14962996965228e-07, "loss": 0.4495, "step": 6285 }, { "epoch": 2.6474799943843887, "grad_norm": 0.4652199447154999, "learning_rate": 4.139859478731839e-07, "loss": 0.4433, "step": 6286 }, { "epoch": 2.6479011652393654, "grad_norm": 0.4087684154510498, "learning_rate": 4.1301000071258035e-07, "loss": 0.4252, "step": 6287 }, { "epoch": 2.6483223360943424, "grad_norm": 0.4318348467350006, "learning_rate": 4.120351557179203e-07, "loss": 0.4324, "step": 6288 }, { "epoch": 2.648743506949319, "grad_norm": 0.3961198627948761, "learning_rate": 4.110614131234375e-07, "loss": 0.3287, "step": 6289 }, { "epoch": 2.649164677804296, "grad_norm": 0.4038732349872589, "learning_rate": 4.1008877316310534e-07, "loss": 0.3937, "step": 6290 }, { "epoch": 2.6495858486592727, "grad_norm": 0.4417679011821747, "learning_rate": 4.0911723607063083e-07, "loss": 0.4303, "step": 6291 }, { "epoch": 2.6500070195142498, "grad_norm": 0.3746228516101837, "learning_rate": 4.0814680207945434e-07, "loss": 0.3868, "step": 6292 }, { "epoch": 2.6504281903692264, "grad_norm": 0.38542839884757996, "learning_rate": 4.07177471422755e-07, "loss": 0.4129, "step": 6293 }, { "epoch": 2.6508493612242034, "grad_norm": 0.39477065205574036, "learning_rate": 4.0620924433344145e-07, "loss": 0.3725, "step": 6294 }, { "epoch": 2.65127053207918, "grad_norm": 0.40144219994544983, "learning_rate": 4.052421210441615e-07, "loss": 0.3518, "step": 6295 }, { "epoch": 2.6516917029341567, "grad_norm": 0.4316757619380951, "learning_rate": 4.042761017872976e-07, "loss": 0.4087, "step": 6296 }, { "epoch": 2.6521128737891337, "grad_norm": 0.41600334644317627, "learning_rate": 4.033111867949635e-07, "loss": 0.3929, "step": 6297 }, { "epoch": 2.652534044644111, "grad_norm": 0.42918598651885986, "learning_rate": 4.023473762990121e-07, "loss": 0.4153, "step": 6298 }, { "epoch": 2.6529552154990874, "grad_norm": 0.44984740018844604, "learning_rate": 4.0138467053102704e-07, "loss": 0.4244, "step": 6299 }, { "epoch": 2.653376386354064, "grad_norm": 0.43903762102127075, "learning_rate": 4.0042306972232767e-07, "loss": 0.405, "step": 6300 }, { "epoch": 2.653797557209041, "grad_norm": 0.43175452947616577, "learning_rate": 3.994625741039698e-07, "loss": 0.4182, "step": 6301 }, { "epoch": 2.654218728064018, "grad_norm": 0.3844256103038788, "learning_rate": 3.985031839067405e-07, "loss": 0.3808, "step": 6302 }, { "epoch": 2.6546398989189948, "grad_norm": 0.4023074209690094, "learning_rate": 3.9754489936116525e-07, "loss": 0.3799, "step": 6303 }, { "epoch": 2.6550610697739714, "grad_norm": 0.3945085406303406, "learning_rate": 3.965877206974994e-07, "loss": 0.4092, "step": 6304 }, { "epoch": 2.6554822406289484, "grad_norm": 0.425666868686676, "learning_rate": 3.956316481457345e-07, "loss": 0.3956, "step": 6305 }, { "epoch": 2.6559034114839255, "grad_norm": 0.40900784730911255, "learning_rate": 3.946766819355985e-07, "loss": 0.404, "step": 6306 }, { "epoch": 2.656324582338902, "grad_norm": 0.3878198564052582, "learning_rate": 3.937228222965489e-07, "loss": 0.3583, "step": 6307 }, { "epoch": 2.6567457531938787, "grad_norm": 0.40606895089149475, "learning_rate": 3.9277006945778295e-07, "loss": 0.4007, "step": 6308 }, { "epoch": 2.657166924048856, "grad_norm": 0.3946262300014496, "learning_rate": 3.91818423648227e-07, "loss": 0.3948, "step": 6309 }, { "epoch": 2.657588094903833, "grad_norm": 0.42374932765960693, "learning_rate": 3.908678850965425e-07, "loss": 0.4158, "step": 6310 }, { "epoch": 2.6580092657588095, "grad_norm": 0.4070732593536377, "learning_rate": 3.899184540311268e-07, "loss": 0.4389, "step": 6311 }, { "epoch": 2.658430436613786, "grad_norm": 0.3502812385559082, "learning_rate": 3.8897013068011136e-07, "loss": 0.3319, "step": 6312 }, { "epoch": 2.658851607468763, "grad_norm": 0.4450004994869232, "learning_rate": 3.880229152713588e-07, "loss": 0.4483, "step": 6313 }, { "epoch": 2.65927277832374, "grad_norm": 0.3923732042312622, "learning_rate": 3.870768080324655e-07, "loss": 0.4198, "step": 6314 }, { "epoch": 2.659693949178717, "grad_norm": 0.41322579979896545, "learning_rate": 3.8613180919076566e-07, "loss": 0.3555, "step": 6315 }, { "epoch": 2.6601151200336934, "grad_norm": 0.41041043400764465, "learning_rate": 3.851879189733221e-07, "loss": 0.4101, "step": 6316 }, { "epoch": 2.6605362908886705, "grad_norm": 0.3952275216579437, "learning_rate": 3.8424513760693506e-07, "loss": 0.4329, "step": 6317 }, { "epoch": 2.6609574617436476, "grad_norm": 0.43707481026649475, "learning_rate": 3.8330346531813665e-07, "loss": 0.4125, "step": 6318 }, { "epoch": 2.661378632598624, "grad_norm": 0.4272534251213074, "learning_rate": 3.823629023331915e-07, "loss": 0.4502, "step": 6319 }, { "epoch": 2.661799803453601, "grad_norm": 0.4249127209186554, "learning_rate": 3.81423448878101e-07, "loss": 0.4078, "step": 6320 }, { "epoch": 2.662220974308578, "grad_norm": 0.39386919140815735, "learning_rate": 3.804851051785957e-07, "loss": 0.4134, "step": 6321 }, { "epoch": 2.662642145163555, "grad_norm": 0.4174618422985077, "learning_rate": 3.7954787146014307e-07, "loss": 0.4241, "step": 6322 }, { "epoch": 2.6630633160185315, "grad_norm": 0.42963719367980957, "learning_rate": 3.786117479479423e-07, "loss": 0.4273, "step": 6323 }, { "epoch": 2.663484486873508, "grad_norm": 0.40621593594551086, "learning_rate": 3.776767348669247e-07, "loss": 0.4152, "step": 6324 }, { "epoch": 2.663905657728485, "grad_norm": 0.4176674485206604, "learning_rate": 3.7674283244175767e-07, "loss": 0.4146, "step": 6325 }, { "epoch": 2.6643268285834623, "grad_norm": 0.4198687970638275, "learning_rate": 3.7581004089683903e-07, "loss": 0.3456, "step": 6326 }, { "epoch": 2.664747999438439, "grad_norm": 0.4510093927383423, "learning_rate": 3.748783604563005e-07, "loss": 0.4197, "step": 6327 }, { "epoch": 2.6651691702934155, "grad_norm": 0.37680912017822266, "learning_rate": 3.7394779134400985e-07, "loss": 0.3684, "step": 6328 }, { "epoch": 2.6655903411483926, "grad_norm": 0.44085535407066345, "learning_rate": 3.730183337835608e-07, "loss": 0.4184, "step": 6329 }, { "epoch": 2.6660115120033696, "grad_norm": 0.46403414011001587, "learning_rate": 3.7208998799828645e-07, "loss": 0.3919, "step": 6330 }, { "epoch": 2.6664326828583462, "grad_norm": 0.49402761459350586, "learning_rate": 3.7116275421125116e-07, "loss": 0.4335, "step": 6331 }, { "epoch": 2.666853853713323, "grad_norm": 0.4192706048488617, "learning_rate": 3.7023663264525003e-07, "loss": 0.4628, "step": 6332 }, { "epoch": 2.6672750245683, "grad_norm": 0.45376983284950256, "learning_rate": 3.693116235228134e-07, "loss": 0.4507, "step": 6333 }, { "epoch": 2.667696195423277, "grad_norm": 0.39510640501976013, "learning_rate": 3.6838772706620287e-07, "loss": 0.4127, "step": 6334 }, { "epoch": 2.6681173662782536, "grad_norm": 0.44351595640182495, "learning_rate": 3.6746494349741255e-07, "loss": 0.3739, "step": 6335 }, { "epoch": 2.66853853713323, "grad_norm": 0.39444613456726074, "learning_rate": 3.665432730381707e-07, "loss": 0.4299, "step": 6336 }, { "epoch": 2.6689597079882073, "grad_norm": 0.3862343430519104, "learning_rate": 3.656227159099368e-07, "loss": 0.3556, "step": 6337 }, { "epoch": 2.6693808788431843, "grad_norm": 0.4342425763607025, "learning_rate": 3.6470327233390223e-07, "loss": 0.3746, "step": 6338 }, { "epoch": 2.669802049698161, "grad_norm": 0.3800713121891022, "learning_rate": 3.6378494253099307e-07, "loss": 0.4075, "step": 6339 }, { "epoch": 2.6702232205531375, "grad_norm": 0.4209791421890259, "learning_rate": 3.62867726721865e-07, "loss": 0.412, "step": 6340 }, { "epoch": 2.6706443914081146, "grad_norm": 0.4880928695201874, "learning_rate": 3.6195162512690894e-07, "loss": 0.4493, "step": 6341 }, { "epoch": 2.6710655622630917, "grad_norm": 0.4126826226711273, "learning_rate": 3.6103663796624557e-07, "loss": 0.3561, "step": 6342 }, { "epoch": 2.6714867331180683, "grad_norm": 0.3994891047477722, "learning_rate": 3.601227654597278e-07, "loss": 0.4144, "step": 6343 }, { "epoch": 2.671907903973045, "grad_norm": 0.3976879119873047, "learning_rate": 3.5921000782694447e-07, "loss": 0.4168, "step": 6344 }, { "epoch": 2.672329074828022, "grad_norm": 0.47719258069992065, "learning_rate": 3.5829836528721127e-07, "loss": 0.4777, "step": 6345 }, { "epoch": 2.6727502456829986, "grad_norm": 0.3888223171234131, "learning_rate": 3.573878380595791e-07, "loss": 0.3561, "step": 6346 }, { "epoch": 2.6731714165379756, "grad_norm": 0.41435810923576355, "learning_rate": 3.5647842636283237e-07, "loss": 0.4264, "step": 6347 }, { "epoch": 2.6735925873929522, "grad_norm": 0.4626689553260803, "learning_rate": 3.5557013041548183e-07, "loss": 0.3927, "step": 6348 }, { "epoch": 2.6740137582479293, "grad_norm": 0.4047459065914154, "learning_rate": 3.5466295043577514e-07, "loss": 0.37, "step": 6349 }, { "epoch": 2.674434929102906, "grad_norm": 0.4588264226913452, "learning_rate": 3.537568866416913e-07, "loss": 0.4457, "step": 6350 }, { "epoch": 2.674856099957883, "grad_norm": 0.3885221779346466, "learning_rate": 3.528519392509383e-07, "loss": 0.3733, "step": 6351 }, { "epoch": 2.6752772708128596, "grad_norm": 0.42149290442466736, "learning_rate": 3.519481084809606e-07, "loss": 0.4049, "step": 6352 }, { "epoch": 2.6756984416678367, "grad_norm": 0.4303159713745117, "learning_rate": 3.510453945489273e-07, "loss": 0.3826, "step": 6353 }, { "epoch": 2.6761196125228133, "grad_norm": 0.43527156114578247, "learning_rate": 3.501437976717459e-07, "loss": 0.406, "step": 6354 }, { "epoch": 2.6765407833777903, "grad_norm": 0.3824620544910431, "learning_rate": 3.4924331806605315e-07, "loss": 0.3674, "step": 6355 }, { "epoch": 2.676961954232767, "grad_norm": 0.39238572120666504, "learning_rate": 3.483439559482155e-07, "loss": 0.3532, "step": 6356 }, { "epoch": 2.677383125087744, "grad_norm": 0.44003087282180786, "learning_rate": 3.474457115343344e-07, "loss": 0.4807, "step": 6357 }, { "epoch": 2.6778042959427206, "grad_norm": 0.34542742371559143, "learning_rate": 3.4654858504023946e-07, "loss": 0.3518, "step": 6358 }, { "epoch": 2.6782254667976977, "grad_norm": 0.41034355759620667, "learning_rate": 3.456525766814922e-07, "loss": 0.4012, "step": 6359 }, { "epoch": 2.6786466376526743, "grad_norm": 0.4039165675640106, "learning_rate": 3.447576866733887e-07, "loss": 0.4044, "step": 6360 }, { "epoch": 2.6790678085076514, "grad_norm": 0.4192562997341156, "learning_rate": 3.438639152309514e-07, "loss": 0.4297, "step": 6361 }, { "epoch": 2.679488979362628, "grad_norm": 0.40622809529304504, "learning_rate": 3.429712625689385e-07, "loss": 0.3868, "step": 6362 }, { "epoch": 2.679910150217605, "grad_norm": 0.39961549639701843, "learning_rate": 3.4207972890183627e-07, "loss": 0.349, "step": 6363 }, { "epoch": 2.6803313210725817, "grad_norm": 0.4126964807510376, "learning_rate": 3.4118931444386273e-07, "loss": 0.4383, "step": 6364 }, { "epoch": 2.6807524919275587, "grad_norm": 0.43316054344177246, "learning_rate": 3.403000194089695e-07, "loss": 0.4079, "step": 6365 }, { "epoch": 2.6811736627825353, "grad_norm": 0.4058232307434082, "learning_rate": 3.3941184401083404e-07, "loss": 0.3808, "step": 6366 }, { "epoch": 2.6815948336375124, "grad_norm": 0.38208913803100586, "learning_rate": 3.3852478846287115e-07, "loss": 0.3999, "step": 6367 }, { "epoch": 2.682016004492489, "grad_norm": 0.3633490800857544, "learning_rate": 3.3763885297822153e-07, "loss": 0.3761, "step": 6368 }, { "epoch": 2.682437175347466, "grad_norm": 0.3878664970397949, "learning_rate": 3.367540377697581e-07, "loss": 0.3719, "step": 6369 }, { "epoch": 2.6828583462024427, "grad_norm": 0.43482282757759094, "learning_rate": 3.3587034305008593e-07, "loss": 0.4615, "step": 6370 }, { "epoch": 2.6832795170574197, "grad_norm": 0.39087700843811035, "learning_rate": 3.349877690315406e-07, "loss": 0.3753, "step": 6371 }, { "epoch": 2.6837006879123964, "grad_norm": 0.4411132335662842, "learning_rate": 3.341063159261865e-07, "loss": 0.4117, "step": 6372 }, { "epoch": 2.6841218587673734, "grad_norm": 0.4314364790916443, "learning_rate": 3.332259839458196e-07, "loss": 0.4663, "step": 6373 }, { "epoch": 2.68454302962235, "grad_norm": 0.42107635736465454, "learning_rate": 3.3234677330196865e-07, "loss": 0.4355, "step": 6374 }, { "epoch": 2.684964200477327, "grad_norm": 0.4510934054851532, "learning_rate": 3.314686842058895e-07, "loss": 0.4382, "step": 6375 }, { "epoch": 2.6853853713323037, "grad_norm": 0.405705064535141, "learning_rate": 3.305917168685713e-07, "loss": 0.3517, "step": 6376 }, { "epoch": 2.6858065421872808, "grad_norm": 0.49402210116386414, "learning_rate": 3.29715871500732e-07, "loss": 0.428, "step": 6377 }, { "epoch": 2.6862277130422574, "grad_norm": 0.4792787432670593, "learning_rate": 3.2884114831281954e-07, "loss": 0.4674, "step": 6378 }, { "epoch": 2.6866488838972344, "grad_norm": 0.4331916570663452, "learning_rate": 3.2796754751501513e-07, "loss": 0.3787, "step": 6379 }, { "epoch": 2.687070054752211, "grad_norm": 0.40939411520957947, "learning_rate": 3.2709506931722646e-07, "loss": 0.3966, "step": 6380 }, { "epoch": 2.687491225607188, "grad_norm": 0.38100770115852356, "learning_rate": 3.2622371392909523e-07, "loss": 0.4051, "step": 6381 }, { "epoch": 2.6879123964621647, "grad_norm": 0.41996678709983826, "learning_rate": 3.253534815599901e-07, "loss": 0.4344, "step": 6382 }, { "epoch": 2.688333567317142, "grad_norm": 0.37426263093948364, "learning_rate": 3.2448437241901056e-07, "loss": 0.3565, "step": 6383 }, { "epoch": 2.6887547381721184, "grad_norm": 0.4527526795864105, "learning_rate": 3.236163867149894e-07, "loss": 0.4729, "step": 6384 }, { "epoch": 2.6891759090270955, "grad_norm": 0.4266412854194641, "learning_rate": 3.2274952465648433e-07, "loss": 0.4204, "step": 6385 }, { "epoch": 2.689597079882072, "grad_norm": 0.41881540417671204, "learning_rate": 3.218837864517871e-07, "loss": 0.418, "step": 6386 }, { "epoch": 2.690018250737049, "grad_norm": 0.411843866109848, "learning_rate": 3.21019172308919e-07, "loss": 0.403, "step": 6387 }, { "epoch": 2.6904394215920258, "grad_norm": 0.40866005420684814, "learning_rate": 3.2015568243562776e-07, "loss": 0.399, "step": 6388 }, { "epoch": 2.690860592447003, "grad_norm": 0.4372122287750244, "learning_rate": 3.1929331703939525e-07, "loss": 0.4053, "step": 6389 }, { "epoch": 2.6912817633019794, "grad_norm": 0.4213927388191223, "learning_rate": 3.184320763274312e-07, "loss": 0.4044, "step": 6390 }, { "epoch": 2.6917029341569565, "grad_norm": 0.42822548747062683, "learning_rate": 3.175719605066746e-07, "loss": 0.396, "step": 6391 }, { "epoch": 2.692124105011933, "grad_norm": 0.41838258504867554, "learning_rate": 3.1671296978379564e-07, "loss": 0.4205, "step": 6392 }, { "epoch": 2.69254527586691, "grad_norm": 0.3908573091030121, "learning_rate": 3.1585510436519307e-07, "loss": 0.4, "step": 6393 }, { "epoch": 2.692966446721887, "grad_norm": 0.3696969449520111, "learning_rate": 3.149983644569948e-07, "loss": 0.3606, "step": 6394 }, { "epoch": 2.693387617576864, "grad_norm": 0.40018653869628906, "learning_rate": 3.141427502650607e-07, "loss": 0.3644, "step": 6395 }, { "epoch": 2.6938087884318405, "grad_norm": 0.4957199990749359, "learning_rate": 3.132882619949762e-07, "loss": 0.4947, "step": 6396 }, { "epoch": 2.694229959286817, "grad_norm": 0.4610227942466736, "learning_rate": 3.1243489985206097e-07, "loss": 0.4676, "step": 6397 }, { "epoch": 2.694651130141794, "grad_norm": 0.43671777844429016, "learning_rate": 3.1158266404136053e-07, "loss": 0.3822, "step": 6398 }, { "epoch": 2.695072300996771, "grad_norm": 0.43433985114097595, "learning_rate": 3.107315547676493e-07, "loss": 0.4234, "step": 6399 }, { "epoch": 2.695493471851748, "grad_norm": 0.3895840048789978, "learning_rate": 3.0988157223543535e-07, "loss": 0.3826, "step": 6400 }, { "epoch": 2.6959146427067244, "grad_norm": 0.3983854353427887, "learning_rate": 3.090327166489504e-07, "loss": 0.3837, "step": 6401 }, { "epoch": 2.6963358135617015, "grad_norm": 0.44468697905540466, "learning_rate": 3.0818498821216125e-07, "loss": 0.4809, "step": 6402 }, { "epoch": 2.6967569844166785, "grad_norm": 0.39917388558387756, "learning_rate": 3.073383871287589e-07, "loss": 0.4108, "step": 6403 }, { "epoch": 2.697178155271655, "grad_norm": 0.3923493027687073, "learning_rate": 3.0649291360216437e-07, "loss": 0.3812, "step": 6404 }, { "epoch": 2.697599326126632, "grad_norm": 0.4544789791107178, "learning_rate": 3.0564856783553085e-07, "loss": 0.4755, "step": 6405 }, { "epoch": 2.698020496981609, "grad_norm": 0.40971142053604126, "learning_rate": 3.048053500317383e-07, "loss": 0.3703, "step": 6406 }, { "epoch": 2.698441667836586, "grad_norm": 0.42814943194389343, "learning_rate": 3.039632603933951e-07, "loss": 0.388, "step": 6407 }, { "epoch": 2.6988628386915625, "grad_norm": 0.4660402238368988, "learning_rate": 3.0312229912283884e-07, "loss": 0.4182, "step": 6408 }, { "epoch": 2.699284009546539, "grad_norm": 0.3876696228981018, "learning_rate": 3.0228246642213743e-07, "loss": 0.354, "step": 6409 }, { "epoch": 2.699705180401516, "grad_norm": 0.4093291461467743, "learning_rate": 3.0144376249308606e-07, "loss": 0.4528, "step": 6410 }, { "epoch": 2.7001263512564933, "grad_norm": 0.3802977502346039, "learning_rate": 3.006061875372096e-07, "loss": 0.3548, "step": 6411 }, { "epoch": 2.70054752211147, "grad_norm": 0.4147048592567444, "learning_rate": 2.9976974175576155e-07, "loss": 0.4072, "step": 6412 }, { "epoch": 2.7009686929664465, "grad_norm": 0.35636505484580994, "learning_rate": 2.989344253497223e-07, "loss": 0.3576, "step": 6413 }, { "epoch": 2.7013898638214235, "grad_norm": 0.444316029548645, "learning_rate": 2.9810023851980397e-07, "loss": 0.3874, "step": 6414 }, { "epoch": 2.7018110346764006, "grad_norm": 0.404773473739624, "learning_rate": 2.9726718146644517e-07, "loss": 0.4215, "step": 6415 }, { "epoch": 2.702232205531377, "grad_norm": 0.4419313967227936, "learning_rate": 2.964352543898136e-07, "loss": 0.4078, "step": 6416 }, { "epoch": 2.702653376386354, "grad_norm": 0.4078904986381531, "learning_rate": 2.9560445748980595e-07, "loss": 0.3792, "step": 6417 }, { "epoch": 2.703074547241331, "grad_norm": 0.3926849067211151, "learning_rate": 2.947747909660448e-07, "loss": 0.4398, "step": 6418 }, { "epoch": 2.703495718096308, "grad_norm": 0.41974180936813354, "learning_rate": 2.939462550178862e-07, "loss": 0.4048, "step": 6419 }, { "epoch": 2.7039168889512846, "grad_norm": 0.40227365493774414, "learning_rate": 2.9311884984440876e-07, "loss": 0.3828, "step": 6420 }, { "epoch": 2.704338059806261, "grad_norm": 0.44409507513046265, "learning_rate": 2.9229257564442283e-07, "loss": 0.4023, "step": 6421 }, { "epoch": 2.7047592306612382, "grad_norm": 0.4207645356655121, "learning_rate": 2.9146743261646905e-07, "loss": 0.3772, "step": 6422 }, { "epoch": 2.7051804015162153, "grad_norm": 0.3966204822063446, "learning_rate": 2.906434209588088e-07, "loss": 0.436, "step": 6423 }, { "epoch": 2.705601572371192, "grad_norm": 0.40010175108909607, "learning_rate": 2.898205408694399e-07, "loss": 0.3868, "step": 6424 }, { "epoch": 2.7060227432261685, "grad_norm": 0.44145235419273376, "learning_rate": 2.88998792546083e-07, "loss": 0.389, "step": 6425 }, { "epoch": 2.7064439140811456, "grad_norm": 0.44106680154800415, "learning_rate": 2.8817817618618846e-07, "loss": 0.4527, "step": 6426 }, { "epoch": 2.7068650849361227, "grad_norm": 0.4550645053386688, "learning_rate": 2.87358691986937e-07, "loss": 0.4526, "step": 6427 }, { "epoch": 2.7072862557910993, "grad_norm": 0.41512352228164673, "learning_rate": 2.8654034014523157e-07, "loss": 0.3864, "step": 6428 }, { "epoch": 2.707707426646076, "grad_norm": 0.42970332503318787, "learning_rate": 2.8572312085770824e-07, "loss": 0.4232, "step": 6429 }, { "epoch": 2.708128597501053, "grad_norm": 0.3725704252719879, "learning_rate": 2.8490703432073e-07, "loss": 0.3841, "step": 6430 }, { "epoch": 2.70854976835603, "grad_norm": 0.41868096590042114, "learning_rate": 2.8409208073038484e-07, "loss": 0.4435, "step": 6431 }, { "epoch": 2.7089709392110066, "grad_norm": 0.3975916802883148, "learning_rate": 2.832782602824924e-07, "loss": 0.3839, "step": 6432 }, { "epoch": 2.7093921100659832, "grad_norm": 0.3945305049419403, "learning_rate": 2.8246557317259725e-07, "loss": 0.4004, "step": 6433 }, { "epoch": 2.7098132809209603, "grad_norm": 0.3795965611934662, "learning_rate": 2.816540195959716e-07, "loss": 0.3721, "step": 6434 }, { "epoch": 2.7102344517759374, "grad_norm": 0.4135974049568176, "learning_rate": 2.8084359974761833e-07, "loss": 0.421, "step": 6435 }, { "epoch": 2.710655622630914, "grad_norm": 0.39062461256980896, "learning_rate": 2.800343138222639e-07, "loss": 0.4017, "step": 6436 }, { "epoch": 2.7110767934858906, "grad_norm": 0.3908005356788635, "learning_rate": 2.7922616201436604e-07, "loss": 0.3947, "step": 6437 }, { "epoch": 2.7114979643408677, "grad_norm": 0.3943858742713928, "learning_rate": 2.784191445181067e-07, "loss": 0.3787, "step": 6438 }, { "epoch": 2.7119191351958447, "grad_norm": 0.4459022879600525, "learning_rate": 2.776132615273969e-07, "loss": 0.4288, "step": 6439 }, { "epoch": 2.7123403060508213, "grad_norm": 0.4153537452220917, "learning_rate": 2.768085132358761e-07, "loss": 0.4258, "step": 6440 }, { "epoch": 2.712761476905798, "grad_norm": 0.39827555418014526, "learning_rate": 2.760048998369086e-07, "loss": 0.3552, "step": 6441 }, { "epoch": 2.713182647760775, "grad_norm": 0.4535510838031769, "learning_rate": 2.7520242152358767e-07, "loss": 0.4494, "step": 6442 }, { "epoch": 2.713603818615752, "grad_norm": 0.41464003920555115, "learning_rate": 2.744010784887341e-07, "loss": 0.3996, "step": 6443 }, { "epoch": 2.7140249894707287, "grad_norm": 0.46460822224617004, "learning_rate": 2.7360087092489385e-07, "loss": 0.3984, "step": 6444 }, { "epoch": 2.7144461603257053, "grad_norm": 0.4071795344352722, "learning_rate": 2.728017990243426e-07, "loss": 0.397, "step": 6445 }, { "epoch": 2.7148673311806824, "grad_norm": 0.43248218297958374, "learning_rate": 2.720038629790839e-07, "loss": 0.4379, "step": 6446 }, { "epoch": 2.715288502035659, "grad_norm": 0.43725842237472534, "learning_rate": 2.712070629808422e-07, "loss": 0.3562, "step": 6447 }, { "epoch": 2.715709672890636, "grad_norm": 0.4301241338253021, "learning_rate": 2.7041139922107607e-07, "loss": 0.3614, "step": 6448 }, { "epoch": 2.7161308437456126, "grad_norm": 0.4504960775375366, "learning_rate": 2.6961687189096863e-07, "loss": 0.4343, "step": 6449 }, { "epoch": 2.7165520146005897, "grad_norm": 0.42354050278663635, "learning_rate": 2.6882348118142834e-07, "loss": 0.3923, "step": 6450 }, { "epoch": 2.7169731854555663, "grad_norm": 0.43391573429107666, "learning_rate": 2.680312272830926e-07, "loss": 0.3973, "step": 6451 }, { "epoch": 2.7173943563105434, "grad_norm": 0.38265424966812134, "learning_rate": 2.6724011038632424e-07, "loss": 0.3999, "step": 6452 }, { "epoch": 2.71781552716552, "grad_norm": 0.43084803223609924, "learning_rate": 2.6645013068121397e-07, "loss": 0.4658, "step": 6453 }, { "epoch": 2.718236698020497, "grad_norm": 0.4090678095817566, "learning_rate": 2.6566128835757874e-07, "loss": 0.3694, "step": 6454 }, { "epoch": 2.7186578688754737, "grad_norm": 0.34903964400291443, "learning_rate": 2.648735836049615e-07, "loss": 0.3403, "step": 6455 }, { "epoch": 2.7190790397304507, "grad_norm": 0.4150046408176422, "learning_rate": 2.6408701661263416e-07, "loss": 0.4261, "step": 6456 }, { "epoch": 2.7195002105854273, "grad_norm": 0.41110309958457947, "learning_rate": 2.633015875695927e-07, "loss": 0.3626, "step": 6457 }, { "epoch": 2.7199213814404044, "grad_norm": 0.41740912199020386, "learning_rate": 2.6251729666456006e-07, "loss": 0.4092, "step": 6458 }, { "epoch": 2.720342552295381, "grad_norm": 0.46945837140083313, "learning_rate": 2.617341440859883e-07, "loss": 0.4652, "step": 6459 }, { "epoch": 2.720763723150358, "grad_norm": 0.4220731854438782, "learning_rate": 2.609521300220519e-07, "loss": 0.4198, "step": 6460 }, { "epoch": 2.7211848940053347, "grad_norm": 0.4270876348018646, "learning_rate": 2.601712546606555e-07, "loss": 0.4411, "step": 6461 }, { "epoch": 2.7216060648603118, "grad_norm": 0.4411364793777466, "learning_rate": 2.5939151818942743e-07, "loss": 0.4069, "step": 6462 }, { "epoch": 2.7220272357152884, "grad_norm": 0.39275363087654114, "learning_rate": 2.586129207957239e-07, "loss": 0.3878, "step": 6463 }, { "epoch": 2.7224484065702654, "grad_norm": 0.41342923045158386, "learning_rate": 2.578354626666263e-07, "loss": 0.4269, "step": 6464 }, { "epoch": 2.722869577425242, "grad_norm": 0.3970925211906433, "learning_rate": 2.570591439889447e-07, "loss": 0.3779, "step": 6465 }, { "epoch": 2.723290748280219, "grad_norm": 0.4171464741230011, "learning_rate": 2.5628396494921217e-07, "loss": 0.3699, "step": 6466 }, { "epoch": 2.7237119191351957, "grad_norm": 0.4068872928619385, "learning_rate": 2.5550992573368907e-07, "loss": 0.4084, "step": 6467 }, { "epoch": 2.724133089990173, "grad_norm": 0.37882399559020996, "learning_rate": 2.5473702652836383e-07, "loss": 0.3869, "step": 6468 }, { "epoch": 2.7245542608451494, "grad_norm": 0.42223459482192993, "learning_rate": 2.5396526751894747e-07, "loss": 0.4302, "step": 6469 }, { "epoch": 2.7249754317001265, "grad_norm": 0.3955354690551758, "learning_rate": 2.5319464889088043e-07, "loss": 0.3943, "step": 6470 }, { "epoch": 2.725396602555103, "grad_norm": 0.43118587136268616, "learning_rate": 2.5242517082932736e-07, "loss": 0.442, "step": 6471 }, { "epoch": 2.72581777341008, "grad_norm": 0.42750412225723267, "learning_rate": 2.516568335191777e-07, "loss": 0.42, "step": 6472 }, { "epoch": 2.7262389442650568, "grad_norm": 0.4304518699645996, "learning_rate": 2.508896371450503e-07, "loss": 0.4194, "step": 6473 }, { "epoch": 2.726660115120034, "grad_norm": 0.40355202555656433, "learning_rate": 2.501235818912856e-07, "loss": 0.3699, "step": 6474 }, { "epoch": 2.7270812859750104, "grad_norm": 0.46143317222595215, "learning_rate": 2.493586679419541e-07, "loss": 0.4388, "step": 6475 }, { "epoch": 2.7275024568299875, "grad_norm": 0.39858946204185486, "learning_rate": 2.485948954808493e-07, "loss": 0.3796, "step": 6476 }, { "epoch": 2.727923627684964, "grad_norm": 0.3849276304244995, "learning_rate": 2.4783226469148993e-07, "loss": 0.396, "step": 6477 }, { "epoch": 2.728344798539941, "grad_norm": 0.3949478268623352, "learning_rate": 2.4707077575712337e-07, "loss": 0.4182, "step": 6478 }, { "epoch": 2.728765969394918, "grad_norm": 0.377301424741745, "learning_rate": 2.463104288607188e-07, "loss": 0.4166, "step": 6479 }, { "epoch": 2.729187140249895, "grad_norm": 0.4091057777404785, "learning_rate": 2.45551224184975e-07, "loss": 0.3361, "step": 6480 }, { "epoch": 2.7296083111048715, "grad_norm": 0.4568578600883484, "learning_rate": 2.447931619123145e-07, "loss": 0.422, "step": 6481 }, { "epoch": 2.7300294819598485, "grad_norm": 0.3839542269706726, "learning_rate": 2.440362422248826e-07, "loss": 0.3525, "step": 6482 }, { "epoch": 2.730450652814825, "grad_norm": 0.4111737608909607, "learning_rate": 2.432804653045551e-07, "loss": 0.427, "step": 6483 }, { "epoch": 2.730871823669802, "grad_norm": 0.4737226366996765, "learning_rate": 2.4252583133292927e-07, "loss": 0.4131, "step": 6484 }, { "epoch": 2.731292994524779, "grad_norm": 0.39912742376327515, "learning_rate": 2.4177234049133024e-07, "loss": 0.3533, "step": 6485 }, { "epoch": 2.731714165379756, "grad_norm": 0.3981969952583313, "learning_rate": 2.4101999296080757e-07, "loss": 0.425, "step": 6486 }, { "epoch": 2.7321353362347325, "grad_norm": 0.39626285433769226, "learning_rate": 2.4026878892213445e-07, "loss": 0.4404, "step": 6487 }, { "epoch": 2.7325565070897095, "grad_norm": 0.36883941292762756, "learning_rate": 2.39518728555812e-07, "loss": 0.4093, "step": 6488 }, { "epoch": 2.732977677944686, "grad_norm": 0.3488835096359253, "learning_rate": 2.3876981204206563e-07, "loss": 0.4336, "step": 6489 }, { "epoch": 2.733398848799663, "grad_norm": 0.3966779112815857, "learning_rate": 2.3802203956084457e-07, "loss": 0.4247, "step": 6490 }, { "epoch": 2.73382001965464, "grad_norm": 0.4332793354988098, "learning_rate": 2.3727541129182574e-07, "loss": 0.3689, "step": 6491 }, { "epoch": 2.734241190509617, "grad_norm": 0.4258539080619812, "learning_rate": 2.3652992741440895e-07, "loss": 0.387, "step": 6492 }, { "epoch": 2.7346623613645935, "grad_norm": 0.4617418348789215, "learning_rate": 2.357855881077181e-07, "loss": 0.44, "step": 6493 }, { "epoch": 2.7350835322195706, "grad_norm": 0.39749130606651306, "learning_rate": 2.3504239355060632e-07, "loss": 0.4309, "step": 6494 }, { "epoch": 2.735504703074547, "grad_norm": 0.4138607382774353, "learning_rate": 2.3430034392164737e-07, "loss": 0.4027, "step": 6495 }, { "epoch": 2.7359258739295242, "grad_norm": 0.3834135830402374, "learning_rate": 2.335594393991425e-07, "loss": 0.3892, "step": 6496 }, { "epoch": 2.736347044784501, "grad_norm": 0.41927123069763184, "learning_rate": 2.3281968016111657e-07, "loss": 0.4478, "step": 6497 }, { "epoch": 2.7367682156394775, "grad_norm": 0.38356807827949524, "learning_rate": 2.3208106638531846e-07, "loss": 0.3583, "step": 6498 }, { "epoch": 2.7371893864944545, "grad_norm": 0.38943254947662354, "learning_rate": 2.3134359824922458e-07, "loss": 0.4134, "step": 6499 }, { "epoch": 2.7376105573494316, "grad_norm": 0.402778297662735, "learning_rate": 2.3060727593003375e-07, "loss": 0.3687, "step": 6500 }, { "epoch": 2.738031728204408, "grad_norm": 0.40783360600471497, "learning_rate": 2.2987209960466995e-07, "loss": 0.4267, "step": 6501 }, { "epoch": 2.738452899059385, "grad_norm": 0.3675727844238281, "learning_rate": 2.2913806944978244e-07, "loss": 0.356, "step": 6502 }, { "epoch": 2.738874069914362, "grad_norm": 0.41306471824645996, "learning_rate": 2.2840518564174408e-07, "loss": 0.4201, "step": 6503 }, { "epoch": 2.739295240769339, "grad_norm": 0.39247551560401917, "learning_rate": 2.2767344835665284e-07, "loss": 0.4184, "step": 6504 }, { "epoch": 2.7397164116243156, "grad_norm": 0.4367520213127136, "learning_rate": 2.2694285777033198e-07, "loss": 0.3884, "step": 6505 }, { "epoch": 2.740137582479292, "grad_norm": 0.44848179817199707, "learning_rate": 2.262134140583283e-07, "loss": 0.4396, "step": 6506 }, { "epoch": 2.7405587533342692, "grad_norm": 0.40834710001945496, "learning_rate": 2.254851173959116e-07, "loss": 0.3877, "step": 6507 }, { "epoch": 2.7409799241892463, "grad_norm": 0.4226018786430359, "learning_rate": 2.2475796795808026e-07, "loss": 0.3929, "step": 6508 }, { "epoch": 2.741401095044223, "grad_norm": 0.4211312234401703, "learning_rate": 2.2403196591955168e-07, "loss": 0.4649, "step": 6509 }, { "epoch": 2.7418222658991995, "grad_norm": 0.41637182235717773, "learning_rate": 2.2330711145477247e-07, "loss": 0.3852, "step": 6510 }, { "epoch": 2.7422434367541766, "grad_norm": 0.5247750878334045, "learning_rate": 2.2258340473790997e-07, "loss": 0.4125, "step": 6511 }, { "epoch": 2.7426646076091536, "grad_norm": 0.40080153942108154, "learning_rate": 2.2186084594285728e-07, "loss": 0.4036, "step": 6512 }, { "epoch": 2.7430857784641303, "grad_norm": 0.39470526576042175, "learning_rate": 2.2113943524323167e-07, "loss": 0.4151, "step": 6513 }, { "epoch": 2.743506949319107, "grad_norm": 0.3666439950466156, "learning_rate": 2.204191728123739e-07, "loss": 0.4144, "step": 6514 }, { "epoch": 2.743928120174084, "grad_norm": 0.3847425878047943, "learning_rate": 2.1970005882335056e-07, "loss": 0.4243, "step": 6515 }, { "epoch": 2.744349291029061, "grad_norm": 0.4026682674884796, "learning_rate": 2.189820934489495e-07, "loss": 0.3809, "step": 6516 }, { "epoch": 2.7447704618840376, "grad_norm": 0.3850785791873932, "learning_rate": 2.1826527686168442e-07, "loss": 0.4351, "step": 6517 }, { "epoch": 2.7451916327390142, "grad_norm": 0.4105014204978943, "learning_rate": 2.1754960923379364e-07, "loss": 0.4227, "step": 6518 }, { "epoch": 2.7456128035939913, "grad_norm": 0.3779100179672241, "learning_rate": 2.168350907372363e-07, "loss": 0.3561, "step": 6519 }, { "epoch": 2.7460339744489684, "grad_norm": 0.3821816146373749, "learning_rate": 2.1612172154369894e-07, "loss": 0.4239, "step": 6520 }, { "epoch": 2.746455145303945, "grad_norm": 0.3693183660507202, "learning_rate": 2.154095018245922e-07, "loss": 0.3695, "step": 6521 }, { "epoch": 2.7468763161589216, "grad_norm": 0.42029741406440735, "learning_rate": 2.1469843175104533e-07, "loss": 0.424, "step": 6522 }, { "epoch": 2.7472974870138986, "grad_norm": 0.3754366338253021, "learning_rate": 2.1398851149391664e-07, "loss": 0.3633, "step": 6523 }, { "epoch": 2.7477186578688757, "grad_norm": 0.4561646282672882, "learning_rate": 2.1327974122378693e-07, "loss": 0.4228, "step": 6524 }, { "epoch": 2.7481398287238523, "grad_norm": 0.41358959674835205, "learning_rate": 2.1257212111095882e-07, "loss": 0.4308, "step": 6525 }, { "epoch": 2.748560999578829, "grad_norm": 0.4355665147304535, "learning_rate": 2.1186565132546188e-07, "loss": 0.3704, "step": 6526 }, { "epoch": 2.748982170433806, "grad_norm": 0.45577186346054077, "learning_rate": 2.1116033203704534e-07, "loss": 0.4461, "step": 6527 }, { "epoch": 2.749403341288783, "grad_norm": 0.4575749635696411, "learning_rate": 2.1045616341518416e-07, "loss": 0.4422, "step": 6528 }, { "epoch": 2.7498245121437597, "grad_norm": 0.407699316740036, "learning_rate": 2.0975314562907746e-07, "loss": 0.3838, "step": 6529 }, { "epoch": 2.7502456829987363, "grad_norm": 0.4357109069824219, "learning_rate": 2.090512788476462e-07, "loss": 0.4206, "step": 6530 }, { "epoch": 2.7506668538537133, "grad_norm": 0.4156669080257416, "learning_rate": 2.0835056323953662e-07, "loss": 0.404, "step": 6531 }, { "epoch": 2.7510880247086904, "grad_norm": 0.44738149642944336, "learning_rate": 2.0765099897311624e-07, "loss": 0.4158, "step": 6532 }, { "epoch": 2.751509195563667, "grad_norm": 0.4069022536277771, "learning_rate": 2.0695258621647618e-07, "loss": 0.4284, "step": 6533 }, { "epoch": 2.7519303664186436, "grad_norm": 0.4208481013774872, "learning_rate": 2.0625532513743385e-07, "loss": 0.3847, "step": 6534 }, { "epoch": 2.7523515372736207, "grad_norm": 0.42954954504966736, "learning_rate": 2.0555921590352635e-07, "loss": 0.361, "step": 6535 }, { "epoch": 2.7527727081285978, "grad_norm": 0.3834517002105713, "learning_rate": 2.0486425868201598e-07, "loss": 0.4024, "step": 6536 }, { "epoch": 2.7531938789835744, "grad_norm": 0.3778125047683716, "learning_rate": 2.0417045363988753e-07, "loss": 0.3703, "step": 6537 }, { "epoch": 2.753615049838551, "grad_norm": 0.4488578736782074, "learning_rate": 2.0347780094384816e-07, "loss": 0.4077, "step": 6538 }, { "epoch": 2.754036220693528, "grad_norm": 0.46569207310676575, "learning_rate": 2.0278630076032978e-07, "loss": 0.4065, "step": 6539 }, { "epoch": 2.754457391548505, "grad_norm": 0.46019428968429565, "learning_rate": 2.0209595325548727e-07, "loss": 0.4228, "step": 6540 }, { "epoch": 2.7548785624034817, "grad_norm": 0.415069043636322, "learning_rate": 2.0140675859519731e-07, "loss": 0.4119, "step": 6541 }, { "epoch": 2.7552997332584583, "grad_norm": 0.4147756099700928, "learning_rate": 2.007187169450603e-07, "loss": 0.3596, "step": 6542 }, { "epoch": 2.7557209041134354, "grad_norm": 0.47891002893447876, "learning_rate": 2.000318284703989e-07, "loss": 0.4714, "step": 6543 }, { "epoch": 2.7561420749684125, "grad_norm": 0.4493665099143982, "learning_rate": 1.993460933362601e-07, "loss": 0.4561, "step": 6544 }, { "epoch": 2.756563245823389, "grad_norm": 0.38803306221961975, "learning_rate": 1.9866151170741366e-07, "loss": 0.3443, "step": 6545 }, { "epoch": 2.7569844166783657, "grad_norm": 0.4237319529056549, "learning_rate": 1.9797808374834915e-07, "loss": 0.4286, "step": 6546 }, { "epoch": 2.7574055875333428, "grad_norm": 0.40658557415008545, "learning_rate": 1.9729580962328244e-07, "loss": 0.3526, "step": 6547 }, { "epoch": 2.7578267583883194, "grad_norm": 0.4616638422012329, "learning_rate": 1.966146894961518e-07, "loss": 0.4736, "step": 6548 }, { "epoch": 2.7582479292432964, "grad_norm": 0.4159628748893738, "learning_rate": 1.959347235306158e-07, "loss": 0.418, "step": 6549 }, { "epoch": 2.758669100098273, "grad_norm": 0.35960355401039124, "learning_rate": 1.9525591189005876e-07, "loss": 0.3209, "step": 6550 }, { "epoch": 2.75909027095325, "grad_norm": 0.3944014608860016, "learning_rate": 1.9457825473758518e-07, "loss": 0.3796, "step": 6551 }, { "epoch": 2.7595114418082267, "grad_norm": 0.4230155646800995, "learning_rate": 1.9390175223602315e-07, "loss": 0.4122, "step": 6552 }, { "epoch": 2.7599326126632038, "grad_norm": 0.44689565896987915, "learning_rate": 1.9322640454792375e-07, "loss": 0.4154, "step": 6553 }, { "epoch": 2.7603537835181804, "grad_norm": 0.4101187586784363, "learning_rate": 1.9255221183555884e-07, "loss": 0.3968, "step": 6554 }, { "epoch": 2.7607749543731575, "grad_norm": 0.36815422773361206, "learning_rate": 1.9187917426092605e-07, "loss": 0.3767, "step": 6555 }, { "epoch": 2.761196125228134, "grad_norm": 0.4262729287147522, "learning_rate": 1.9120729198574271e-07, "loss": 0.3857, "step": 6556 }, { "epoch": 2.761617296083111, "grad_norm": 0.45259058475494385, "learning_rate": 1.9053656517144804e-07, "loss": 0.403, "step": 6557 }, { "epoch": 2.7620384669380877, "grad_norm": 0.4332301616668701, "learning_rate": 1.898669939792064e-07, "loss": 0.3869, "step": 6558 }, { "epoch": 2.762459637793065, "grad_norm": 0.42506465315818787, "learning_rate": 1.8919857856990188e-07, "loss": 0.4163, "step": 6559 }, { "epoch": 2.7628808086480414, "grad_norm": 0.40433719754219055, "learning_rate": 1.8853131910414268e-07, "loss": 0.3665, "step": 6560 }, { "epoch": 2.7633019795030185, "grad_norm": 0.38652631640434265, "learning_rate": 1.8786521574225837e-07, "loss": 0.4253, "step": 6561 }, { "epoch": 2.763723150357995, "grad_norm": 0.40546831488609314, "learning_rate": 1.8720026864429974e-07, "loss": 0.4044, "step": 6562 }, { "epoch": 2.764144321212972, "grad_norm": 0.4003847539424896, "learning_rate": 1.8653647797004236e-07, "loss": 0.3924, "step": 6563 }, { "epoch": 2.7645654920679488, "grad_norm": 0.40866169333457947, "learning_rate": 1.85873843878982e-07, "loss": 0.3975, "step": 6564 }, { "epoch": 2.764986662922926, "grad_norm": 0.43359240889549255, "learning_rate": 1.852123665303368e-07, "loss": 0.3986, "step": 6565 }, { "epoch": 2.7654078337779024, "grad_norm": 0.43905434012413025, "learning_rate": 1.8455204608304634e-07, "loss": 0.5087, "step": 6566 }, { "epoch": 2.7658290046328795, "grad_norm": 0.3970041573047638, "learning_rate": 1.8389288269577477e-07, "loss": 0.3613, "step": 6567 }, { "epoch": 2.766250175487856, "grad_norm": 0.3996036946773529, "learning_rate": 1.832348765269043e-07, "loss": 0.3941, "step": 6568 }, { "epoch": 2.766671346342833, "grad_norm": 0.38641610741615295, "learning_rate": 1.8257802773454348e-07, "loss": 0.4464, "step": 6569 }, { "epoch": 2.76709251719781, "grad_norm": 0.404867023229599, "learning_rate": 1.819223364765188e-07, "loss": 0.3467, "step": 6570 }, { "epoch": 2.767513688052787, "grad_norm": 0.42813804745674133, "learning_rate": 1.8126780291038037e-07, "loss": 0.45, "step": 6571 }, { "epoch": 2.7679348589077635, "grad_norm": 0.38640132546424866, "learning_rate": 1.8061442719340183e-07, "loss": 0.3805, "step": 6572 }, { "epoch": 2.7683560297627405, "grad_norm": 0.35536569356918335, "learning_rate": 1.7996220948257426e-07, "loss": 0.3742, "step": 6573 }, { "epoch": 2.768777200617717, "grad_norm": 0.45889413356781006, "learning_rate": 1.7931114993461562e-07, "loss": 0.4541, "step": 6574 }, { "epoch": 2.769198371472694, "grad_norm": 0.4035475552082062, "learning_rate": 1.786612487059619e-07, "loss": 0.3485, "step": 6575 }, { "epoch": 2.769619542327671, "grad_norm": 0.43843090534210205, "learning_rate": 1.7801250595277098e-07, "loss": 0.3768, "step": 6576 }, { "epoch": 2.770040713182648, "grad_norm": 0.34388595819473267, "learning_rate": 1.7736492183092535e-07, "loss": 0.3712, "step": 6577 }, { "epoch": 2.7704618840376245, "grad_norm": 0.4090667963027954, "learning_rate": 1.7671849649602502e-07, "loss": 0.3743, "step": 6578 }, { "epoch": 2.7708830548926016, "grad_norm": 0.3957475423812866, "learning_rate": 1.7607323010339462e-07, "loss": 0.3902, "step": 6579 }, { "epoch": 2.771304225747578, "grad_norm": 0.4225342571735382, "learning_rate": 1.7542912280808123e-07, "loss": 0.428, "step": 6580 }, { "epoch": 2.7717253966025552, "grad_norm": 0.3956073224544525, "learning_rate": 1.747861747648477e-07, "loss": 0.3888, "step": 6581 }, { "epoch": 2.772146567457532, "grad_norm": 0.421826034784317, "learning_rate": 1.741443861281844e-07, "loss": 0.4642, "step": 6582 }, { "epoch": 2.772567738312509, "grad_norm": 0.37773481011390686, "learning_rate": 1.7350375705230128e-07, "loss": 0.3802, "step": 6583 }, { "epoch": 2.7729889091674855, "grad_norm": 0.4311513900756836, "learning_rate": 1.728642876911274e-07, "loss": 0.3914, "step": 6584 }, { "epoch": 2.7734100800224626, "grad_norm": 0.4353085160255432, "learning_rate": 1.722259781983171e-07, "loss": 0.4353, "step": 6585 }, { "epoch": 2.773831250877439, "grad_norm": 0.3974725306034088, "learning_rate": 1.715888287272427e-07, "loss": 0.3747, "step": 6586 }, { "epoch": 2.7742524217324163, "grad_norm": 0.3946095108985901, "learning_rate": 1.7095283943099893e-07, "loss": 0.3996, "step": 6587 }, { "epoch": 2.774673592587393, "grad_norm": 0.42881396412849426, "learning_rate": 1.7031801046240303e-07, "loss": 0.3436, "step": 6588 }, { "epoch": 2.77509476344237, "grad_norm": 0.41651639342308044, "learning_rate": 1.6968434197399076e-07, "loss": 0.4032, "step": 6589 }, { "epoch": 2.7755159342973466, "grad_norm": 0.4185185134410858, "learning_rate": 1.690518341180214e-07, "loss": 0.3986, "step": 6590 }, { "epoch": 2.7759371051523236, "grad_norm": 0.4237203598022461, "learning_rate": 1.6842048704647452e-07, "loss": 0.4361, "step": 6591 }, { "epoch": 2.7763582760073002, "grad_norm": 0.4349639415740967, "learning_rate": 1.6779030091104987e-07, "loss": 0.4284, "step": 6592 }, { "epoch": 2.7767794468622773, "grad_norm": 0.374917209148407, "learning_rate": 1.6716127586317077e-07, "loss": 0.4231, "step": 6593 }, { "epoch": 2.777200617717254, "grad_norm": 0.3683454096317291, "learning_rate": 1.66533412053978e-07, "loss": 0.4015, "step": 6594 }, { "epoch": 2.777621788572231, "grad_norm": 0.36308661103248596, "learning_rate": 1.6590670963433642e-07, "loss": 0.4041, "step": 6595 }, { "epoch": 2.7780429594272076, "grad_norm": 0.3649599552154541, "learning_rate": 1.652811687548306e-07, "loss": 0.342, "step": 6596 }, { "epoch": 2.7784641302821846, "grad_norm": 0.40691879391670227, "learning_rate": 1.6465678956576525e-07, "loss": 0.4217, "step": 6597 }, { "epoch": 2.7788853011371613, "grad_norm": 0.367000013589859, "learning_rate": 1.6403357221716708e-07, "loss": 0.3816, "step": 6598 }, { "epoch": 2.779306471992138, "grad_norm": 0.41309526562690735, "learning_rate": 1.6341151685878465e-07, "loss": 0.4164, "step": 6599 }, { "epoch": 2.779727642847115, "grad_norm": 0.38424408435821533, "learning_rate": 1.6279062364008446e-07, "loss": 0.4088, "step": 6600 }, { "epoch": 2.780148813702092, "grad_norm": 0.3996952772140503, "learning_rate": 1.6217089271025556e-07, "loss": 0.404, "step": 6601 }, { "epoch": 2.7805699845570686, "grad_norm": 0.4171733856201172, "learning_rate": 1.6155232421820654e-07, "loss": 0.4031, "step": 6602 }, { "epoch": 2.7809911554120452, "grad_norm": 0.38291335105895996, "learning_rate": 1.6093491831256913e-07, "loss": 0.416, "step": 6603 }, { "epoch": 2.7814123262670223, "grad_norm": 0.4177902340888977, "learning_rate": 1.603186751416941e-07, "loss": 0.3743, "step": 6604 }, { "epoch": 2.7818334971219993, "grad_norm": 0.4049113392829895, "learning_rate": 1.5970359485365184e-07, "loss": 0.4063, "step": 6605 }, { "epoch": 2.782254667976976, "grad_norm": 0.398599773645401, "learning_rate": 1.5908967759623472e-07, "loss": 0.3996, "step": 6606 }, { "epoch": 2.7826758388319526, "grad_norm": 0.4348689019680023, "learning_rate": 1.5847692351695642e-07, "loss": 0.4278, "step": 6607 }, { "epoch": 2.7830970096869296, "grad_norm": 0.4216171205043793, "learning_rate": 1.5786533276304805e-07, "loss": 0.3756, "step": 6608 }, { "epoch": 2.7835181805419067, "grad_norm": 0.4228978753089905, "learning_rate": 1.5725490548146482e-07, "loss": 0.4487, "step": 6609 }, { "epoch": 2.7839393513968833, "grad_norm": 0.4522554874420166, "learning_rate": 1.5664564181888e-07, "loss": 0.4305, "step": 6610 }, { "epoch": 2.78436052225186, "grad_norm": 0.37529507279396057, "learning_rate": 1.560375419216875e-07, "loss": 0.381, "step": 6611 }, { "epoch": 2.784781693106837, "grad_norm": 0.4248584806919098, "learning_rate": 1.5543060593600334e-07, "loss": 0.4248, "step": 6612 }, { "epoch": 2.785202863961814, "grad_norm": 0.40731510519981384, "learning_rate": 1.548248340076619e-07, "loss": 0.4001, "step": 6613 }, { "epoch": 2.7856240348167907, "grad_norm": 0.45224833488464355, "learning_rate": 1.542202262822179e-07, "loss": 0.4709, "step": 6614 }, { "epoch": 2.7860452056717673, "grad_norm": 0.4651443660259247, "learning_rate": 1.5361678290494953e-07, "loss": 0.4559, "step": 6615 }, { "epoch": 2.7864663765267443, "grad_norm": 0.46739357709884644, "learning_rate": 1.5301450402084971e-07, "loss": 0.4259, "step": 6616 }, { "epoch": 2.7868875473817214, "grad_norm": 0.3983246684074402, "learning_rate": 1.5241338977463605e-07, "loss": 0.405, "step": 6617 }, { "epoch": 2.787308718236698, "grad_norm": 0.3954770565032959, "learning_rate": 1.518134403107441e-07, "loss": 0.3538, "step": 6618 }, { "epoch": 2.7877298890916746, "grad_norm": 0.40509361028671265, "learning_rate": 1.5121465577333016e-07, "loss": 0.4151, "step": 6619 }, { "epoch": 2.7881510599466517, "grad_norm": 0.43264061212539673, "learning_rate": 1.5061703630627311e-07, "loss": 0.4268, "step": 6620 }, { "epoch": 2.7885722308016287, "grad_norm": 0.4521815776824951, "learning_rate": 1.5002058205316583e-07, "loss": 0.4147, "step": 6621 }, { "epoch": 2.7889934016566054, "grad_norm": 0.3750031888484955, "learning_rate": 1.49425293157327e-07, "loss": 0.3962, "step": 6622 }, { "epoch": 2.789414572511582, "grad_norm": 0.3396344780921936, "learning_rate": 1.4883116976179334e-07, "loss": 0.4048, "step": 6623 }, { "epoch": 2.789835743366559, "grad_norm": 0.4059771001338959, "learning_rate": 1.4823821200931954e-07, "loss": 0.4137, "step": 6624 }, { "epoch": 2.790256914221536, "grad_norm": 0.42181676626205444, "learning_rate": 1.4764642004238438e-07, "loss": 0.392, "step": 6625 }, { "epoch": 2.7906780850765127, "grad_norm": 0.3567156195640564, "learning_rate": 1.4705579400318247e-07, "loss": 0.3984, "step": 6626 }, { "epoch": 2.7910992559314893, "grad_norm": 0.4055465757846832, "learning_rate": 1.464663340336303e-07, "loss": 0.4321, "step": 6627 }, { "epoch": 2.7915204267864664, "grad_norm": 0.42421379685401917, "learning_rate": 1.4587804027536455e-07, "loss": 0.4223, "step": 6628 }, { "epoch": 2.7919415976414435, "grad_norm": 0.42129001021385193, "learning_rate": 1.4529091286973994e-07, "loss": 0.3926, "step": 6629 }, { "epoch": 2.79236276849642, "grad_norm": 0.3982742130756378, "learning_rate": 1.447049519578325e-07, "loss": 0.4634, "step": 6630 }, { "epoch": 2.7927839393513967, "grad_norm": 0.4071826636791229, "learning_rate": 1.441201576804374e-07, "loss": 0.3637, "step": 6631 }, { "epoch": 2.7932051102063737, "grad_norm": 0.4480907917022705, "learning_rate": 1.435365301780689e-07, "loss": 0.4337, "step": 6632 }, { "epoch": 2.793626281061351, "grad_norm": 0.4114208221435547, "learning_rate": 1.4295406959096314e-07, "loss": 0.3647, "step": 6633 }, { "epoch": 2.7940474519163274, "grad_norm": 0.4356768727302551, "learning_rate": 1.4237277605907206e-07, "loss": 0.5014, "step": 6634 }, { "epoch": 2.794468622771304, "grad_norm": 0.39495012164115906, "learning_rate": 1.4179264972207118e-07, "loss": 0.3401, "step": 6635 }, { "epoch": 2.794889793626281, "grad_norm": 0.4734981656074524, "learning_rate": 1.4121369071935287e-07, "loss": 0.4825, "step": 6636 }, { "epoch": 2.795310964481258, "grad_norm": 0.39421966671943665, "learning_rate": 1.4063589919002974e-07, "loss": 0.3462, "step": 6637 }, { "epoch": 2.7957321353362348, "grad_norm": 0.39608702063560486, "learning_rate": 1.4005927527293462e-07, "loss": 0.3691, "step": 6638 }, { "epoch": 2.7961533061912114, "grad_norm": 0.40761277079582214, "learning_rate": 1.394838191066189e-07, "loss": 0.4218, "step": 6639 }, { "epoch": 2.7965744770461884, "grad_norm": 0.4238884150981903, "learning_rate": 1.389095308293542e-07, "loss": 0.4459, "step": 6640 }, { "epoch": 2.7969956479011655, "grad_norm": 0.4341757893562317, "learning_rate": 1.3833641057913017e-07, "loss": 0.3928, "step": 6641 }, { "epoch": 2.797416818756142, "grad_norm": 0.3892863690853119, "learning_rate": 1.3776445849365772e-07, "loss": 0.3637, "step": 6642 }, { "epoch": 2.7978379896111187, "grad_norm": 0.4416714012622833, "learning_rate": 1.3719367471036417e-07, "loss": 0.4222, "step": 6643 }, { "epoch": 2.798259160466096, "grad_norm": 0.4166301488876343, "learning_rate": 1.3662405936640032e-07, "loss": 0.4385, "step": 6644 }, { "epoch": 2.798680331321073, "grad_norm": 0.37978240847587585, "learning_rate": 1.360556125986323e-07, "loss": 0.3865, "step": 6645 }, { "epoch": 2.7991015021760495, "grad_norm": 0.39645785093307495, "learning_rate": 1.3548833454364641e-07, "loss": 0.4295, "step": 6646 }, { "epoch": 2.799522673031026, "grad_norm": 0.44355881214141846, "learning_rate": 1.3492222533775078e-07, "loss": 0.4855, "step": 6647 }, { "epoch": 2.799943843886003, "grad_norm": 0.37048888206481934, "learning_rate": 1.3435728511696833e-07, "loss": 0.3699, "step": 6648 }, { "epoch": 2.8003650147409798, "grad_norm": 0.4412519931793213, "learning_rate": 1.3379351401704542e-07, "loss": 0.4463, "step": 6649 }, { "epoch": 2.800786185595957, "grad_norm": 0.4853726923465729, "learning_rate": 1.332309121734443e-07, "loss": 0.4195, "step": 6650 }, { "epoch": 2.8012073564509334, "grad_norm": 0.3917861580848694, "learning_rate": 1.3266947972134737e-07, "loss": 0.3522, "step": 6651 }, { "epoch": 2.8016285273059105, "grad_norm": 0.3966332674026489, "learning_rate": 1.3210921679565725e-07, "loss": 0.4272, "step": 6652 }, { "epoch": 2.802049698160887, "grad_norm": 0.4062293767929077, "learning_rate": 1.315501235309924e-07, "loss": 0.4466, "step": 6653 }, { "epoch": 2.802470869015864, "grad_norm": 0.37912458181381226, "learning_rate": 1.3099220006169422e-07, "loss": 0.356, "step": 6654 }, { "epoch": 2.802892039870841, "grad_norm": 0.41648849844932556, "learning_rate": 1.304354465218205e-07, "loss": 0.4036, "step": 6655 }, { "epoch": 2.803313210725818, "grad_norm": 0.4265863597393036, "learning_rate": 1.2987986304514755e-07, "loss": 0.4611, "step": 6656 }, { "epoch": 2.8037343815807945, "grad_norm": 0.4344833195209503, "learning_rate": 1.2932544976517191e-07, "loss": 0.3966, "step": 6657 }, { "epoch": 2.8041555524357715, "grad_norm": 0.40773123502731323, "learning_rate": 1.2877220681510927e-07, "loss": 0.3838, "step": 6658 }, { "epoch": 2.804576723290748, "grad_norm": 0.417068749666214, "learning_rate": 1.282201343278927e-07, "loss": 0.402, "step": 6659 }, { "epoch": 2.804997894145725, "grad_norm": 0.44960302114486694, "learning_rate": 1.2766923243617445e-07, "loss": 0.4025, "step": 6660 }, { "epoch": 2.805419065000702, "grad_norm": 0.45284897089004517, "learning_rate": 1.2711950127232586e-07, "loss": 0.4091, "step": 6661 }, { "epoch": 2.805840235855679, "grad_norm": 0.34989383816719055, "learning_rate": 1.265709409684368e-07, "loss": 0.3329, "step": 6662 }, { "epoch": 2.8062614067106555, "grad_norm": 0.41681739687919617, "learning_rate": 1.260235516563163e-07, "loss": 0.3996, "step": 6663 }, { "epoch": 2.8066825775656326, "grad_norm": 0.3609423339366913, "learning_rate": 1.2547733346749135e-07, "loss": 0.3888, "step": 6664 }, { "epoch": 2.807103748420609, "grad_norm": 0.45533984899520874, "learning_rate": 1.24932286533207e-07, "loss": 0.5112, "step": 6665 }, { "epoch": 2.8075249192755862, "grad_norm": 0.42465344071388245, "learning_rate": 1.2438841098442843e-07, "loss": 0.3556, "step": 6666 }, { "epoch": 2.807946090130563, "grad_norm": 0.4351224899291992, "learning_rate": 1.2384570695183784e-07, "loss": 0.4746, "step": 6667 }, { "epoch": 2.80836726098554, "grad_norm": 0.453287810087204, "learning_rate": 1.2330417456583754e-07, "loss": 0.41, "step": 6668 }, { "epoch": 2.8087884318405165, "grad_norm": 0.43414485454559326, "learning_rate": 1.227638139565468e-07, "loss": 0.4191, "step": 6669 }, { "epoch": 2.8092096026954936, "grad_norm": 0.4232458472251892, "learning_rate": 1.222246252538034e-07, "loss": 0.3837, "step": 6670 }, { "epoch": 2.80963077355047, "grad_norm": 0.4520367383956909, "learning_rate": 1.2168660858716541e-07, "loss": 0.4388, "step": 6671 }, { "epoch": 2.8100519444054473, "grad_norm": 0.3580833077430725, "learning_rate": 1.2114976408590662e-07, "loss": 0.3567, "step": 6672 }, { "epoch": 2.810473115260424, "grad_norm": 0.4421291649341583, "learning_rate": 1.2061409187902108e-07, "loss": 0.4361, "step": 6673 }, { "epoch": 2.810894286115401, "grad_norm": 0.3669161796569824, "learning_rate": 1.200795920952219e-07, "loss": 0.3316, "step": 6674 }, { "epoch": 2.8113154569703775, "grad_norm": 0.4225531816482544, "learning_rate": 1.1954626486293642e-07, "loss": 0.4152, "step": 6675 }, { "epoch": 2.8117366278253546, "grad_norm": 0.4754944443702698, "learning_rate": 1.1901411031031485e-07, "loss": 0.4591, "step": 6676 }, { "epoch": 2.8121577986803312, "grad_norm": 0.47541695833206177, "learning_rate": 1.1848312856522326e-07, "loss": 0.4779, "step": 6677 }, { "epoch": 2.8125789695353083, "grad_norm": 0.37889039516448975, "learning_rate": 1.1795331975524571e-07, "loss": 0.3793, "step": 6678 }, { "epoch": 2.813000140390285, "grad_norm": 0.3942260444164276, "learning_rate": 1.1742468400768758e-07, "loss": 0.46, "step": 6679 }, { "epoch": 2.813421311245262, "grad_norm": 0.41198644042015076, "learning_rate": 1.1689722144956672e-07, "loss": 0.3961, "step": 6680 }, { "epoch": 2.8138424821002386, "grad_norm": 0.3896900415420532, "learning_rate": 1.1637093220762397e-07, "loss": 0.3947, "step": 6681 }, { "epoch": 2.8142636529552156, "grad_norm": 0.4754257798194885, "learning_rate": 1.1584581640831705e-07, "loss": 0.4669, "step": 6682 }, { "epoch": 2.8146848238101922, "grad_norm": 0.41342347860336304, "learning_rate": 1.1532187417781948e-07, "loss": 0.3806, "step": 6683 }, { "epoch": 2.8151059946651693, "grad_norm": 0.39042583107948303, "learning_rate": 1.1479910564202668e-07, "loss": 0.39, "step": 6684 }, { "epoch": 2.815527165520146, "grad_norm": 0.40641510486602783, "learning_rate": 1.142775109265487e-07, "loss": 0.3726, "step": 6685 }, { "epoch": 2.815948336375123, "grad_norm": 0.4027915894985199, "learning_rate": 1.1375709015671421e-07, "loss": 0.399, "step": 6686 }, { "epoch": 2.8163695072300996, "grad_norm": 0.42688918113708496, "learning_rate": 1.1323784345757205e-07, "loss": 0.4046, "step": 6687 }, { "epoch": 2.8167906780850767, "grad_norm": 0.44450876116752625, "learning_rate": 1.1271977095388519e-07, "loss": 0.4032, "step": 6688 }, { "epoch": 2.8172118489400533, "grad_norm": 0.37541449069976807, "learning_rate": 1.1220287277013853e-07, "loss": 0.3788, "step": 6689 }, { "epoch": 2.8176330197950303, "grad_norm": 0.39258480072021484, "learning_rate": 1.1168714903053212e-07, "loss": 0.4161, "step": 6690 }, { "epoch": 2.818054190650007, "grad_norm": 0.40371355414390564, "learning_rate": 1.1117259985898354e-07, "loss": 0.43, "step": 6691 }, { "epoch": 2.818475361504984, "grad_norm": 0.4522882103919983, "learning_rate": 1.1065922537913054e-07, "loss": 0.3862, "step": 6692 }, { "epoch": 2.8188965323599606, "grad_norm": 0.42305493354797363, "learning_rate": 1.1014702571432612e-07, "loss": 0.4001, "step": 6693 }, { "epoch": 2.8193177032149377, "grad_norm": 0.37763556838035583, "learning_rate": 1.0963600098764293e-07, "loss": 0.394, "step": 6694 }, { "epoch": 2.8197388740699143, "grad_norm": 0.4011262357234955, "learning_rate": 1.0912615132186999e-07, "loss": 0.3997, "step": 6695 }, { "epoch": 2.8201600449248914, "grad_norm": 0.3781244456768036, "learning_rate": 1.0861747683951374e-07, "loss": 0.4139, "step": 6696 }, { "epoch": 2.820581215779868, "grad_norm": 0.37555915117263794, "learning_rate": 1.0810997766279974e-07, "loss": 0.3692, "step": 6697 }, { "epoch": 2.821002386634845, "grad_norm": 0.39332449436187744, "learning_rate": 1.07603653913671e-07, "loss": 0.4423, "step": 6698 }, { "epoch": 2.8214235574898217, "grad_norm": 0.37274637818336487, "learning_rate": 1.0709850571378633e-07, "loss": 0.3758, "step": 6699 }, { "epoch": 2.8218447283447983, "grad_norm": 0.38613393902778625, "learning_rate": 1.0659453318452307e-07, "loss": 0.416, "step": 6700 }, { "epoch": 2.8222658991997753, "grad_norm": 0.4164797067642212, "learning_rate": 1.0609173644697712e-07, "loss": 0.4111, "step": 6701 }, { "epoch": 2.8226870700547524, "grad_norm": 0.3740350604057312, "learning_rate": 1.055901156219602e-07, "loss": 0.3472, "step": 6702 }, { "epoch": 2.823108240909729, "grad_norm": 0.43432673811912537, "learning_rate": 1.0508967083000255e-07, "loss": 0.4429, "step": 6703 }, { "epoch": 2.8235294117647056, "grad_norm": 0.43136534094810486, "learning_rate": 1.0459040219135131e-07, "loss": 0.4037, "step": 6704 }, { "epoch": 2.8239505826196827, "grad_norm": 0.38842034339904785, "learning_rate": 1.0409230982597052e-07, "loss": 0.4219, "step": 6705 }, { "epoch": 2.8243717534746597, "grad_norm": 0.37902185320854187, "learning_rate": 1.035953938535439e-07, "loss": 0.3868, "step": 6706 }, { "epoch": 2.8247929243296364, "grad_norm": 0.42511671781539917, "learning_rate": 1.030996543934687e-07, "loss": 0.4163, "step": 6707 }, { "epoch": 2.825214095184613, "grad_norm": 0.40653571486473083, "learning_rate": 1.0260509156486409e-07, "loss": 0.3888, "step": 6708 }, { "epoch": 2.82563526603959, "grad_norm": 0.4117456376552582, "learning_rate": 1.0211170548656224e-07, "loss": 0.4083, "step": 6709 }, { "epoch": 2.826056436894567, "grad_norm": 0.38643401861190796, "learning_rate": 1.0161949627711443e-07, "loss": 0.376, "step": 6710 }, { "epoch": 2.8264776077495437, "grad_norm": 0.475579172372818, "learning_rate": 1.0112846405479049e-07, "loss": 0.4456, "step": 6711 }, { "epoch": 2.8268987786045203, "grad_norm": 0.3602595329284668, "learning_rate": 1.0063860893757438e-07, "loss": 0.3271, "step": 6712 }, { "epoch": 2.8273199494594974, "grad_norm": 0.4650194048881531, "learning_rate": 1.0014993104316972e-07, "loss": 0.4662, "step": 6713 }, { "epoch": 2.8277411203144744, "grad_norm": 0.3981657922267914, "learning_rate": 9.966243048899704e-08, "loss": 0.3846, "step": 6714 }, { "epoch": 2.828162291169451, "grad_norm": 0.4330905079841614, "learning_rate": 9.917610739219263e-08, "loss": 0.413, "step": 6715 }, { "epoch": 2.8285834620244277, "grad_norm": 0.4407326877117157, "learning_rate": 9.869096186961025e-08, "loss": 0.4767, "step": 6716 }, { "epoch": 2.8290046328794047, "grad_norm": 0.3805501163005829, "learning_rate": 9.820699403782218e-08, "loss": 0.378, "step": 6717 }, { "epoch": 2.829425803734382, "grad_norm": 0.41698914766311646, "learning_rate": 9.7724204013116e-08, "loss": 0.4509, "step": 6718 }, { "epoch": 2.8298469745893584, "grad_norm": 0.40834352374076843, "learning_rate": 9.724259191149776e-08, "loss": 0.3688, "step": 6719 }, { "epoch": 2.830268145444335, "grad_norm": 0.3931202292442322, "learning_rate": 9.67621578486877e-08, "loss": 0.3915, "step": 6720 }, { "epoch": 2.830689316299312, "grad_norm": 0.43035075068473816, "learning_rate": 9.628290194012624e-08, "loss": 0.4096, "step": 6721 }, { "epoch": 2.831110487154289, "grad_norm": 0.4522951543331146, "learning_rate": 9.58048243009696e-08, "loss": 0.4313, "step": 6722 }, { "epoch": 2.8315316580092658, "grad_norm": 0.3876549303531647, "learning_rate": 9.532792504609034e-08, "loss": 0.3765, "step": 6723 }, { "epoch": 2.8319528288642424, "grad_norm": 0.38687315583229065, "learning_rate": 9.485220429007847e-08, "loss": 0.3737, "step": 6724 }, { "epoch": 2.8323739997192194, "grad_norm": 0.43611258268356323, "learning_rate": 9.437766214724031e-08, "loss": 0.4451, "step": 6725 }, { "epoch": 2.8327951705741965, "grad_norm": 0.3945050835609436, "learning_rate": 9.390429873159913e-08, "loss": 0.4028, "step": 6726 }, { "epoch": 2.833216341429173, "grad_norm": 0.3868173062801361, "learning_rate": 9.343211415689557e-08, "loss": 0.3492, "step": 6727 }, { "epoch": 2.8336375122841497, "grad_norm": 0.4328043758869171, "learning_rate": 9.296110853658558e-08, "loss": 0.4416, "step": 6728 }, { "epoch": 2.834058683139127, "grad_norm": 0.4611963927745819, "learning_rate": 9.249128198384416e-08, "loss": 0.3917, "step": 6729 }, { "epoch": 2.834479853994104, "grad_norm": 0.42206332087516785, "learning_rate": 9.202263461156103e-08, "loss": 0.4107, "step": 6730 }, { "epoch": 2.8349010248490805, "grad_norm": 0.41502809524536133, "learning_rate": 9.155516653234276e-08, "loss": 0.3921, "step": 6731 }, { "epoch": 2.835322195704057, "grad_norm": 0.45569413900375366, "learning_rate": 9.108887785851339e-08, "loss": 0.4115, "step": 6732 }, { "epoch": 2.835743366559034, "grad_norm": 0.41896986961364746, "learning_rate": 9.062376870211331e-08, "loss": 0.4487, "step": 6733 }, { "epoch": 2.836164537414011, "grad_norm": 0.40646564960479736, "learning_rate": 9.015983917489923e-08, "loss": 0.3752, "step": 6734 }, { "epoch": 2.836585708268988, "grad_norm": 0.4085836410522461, "learning_rate": 8.969708938834476e-08, "loss": 0.4373, "step": 6735 }, { "epoch": 2.8370068791239644, "grad_norm": 0.39624443650245667, "learning_rate": 8.923551945363873e-08, "loss": 0.3911, "step": 6736 }, { "epoch": 2.8374280499789415, "grad_norm": 0.39336952567100525, "learning_rate": 8.87751294816891e-08, "loss": 0.3628, "step": 6737 }, { "epoch": 2.8378492208339186, "grad_norm": 0.41675519943237305, "learning_rate": 8.831591958311792e-08, "loss": 0.4362, "step": 6738 }, { "epoch": 2.838270391688895, "grad_norm": 0.3960826098918915, "learning_rate": 8.785788986826471e-08, "loss": 0.3763, "step": 6739 }, { "epoch": 2.838691562543872, "grad_norm": 0.38821613788604736, "learning_rate": 8.740104044718479e-08, "loss": 0.3931, "step": 6740 }, { "epoch": 2.839112733398849, "grad_norm": 0.39336782693862915, "learning_rate": 8.694537142965143e-08, "loss": 0.4216, "step": 6741 }, { "epoch": 2.839533904253826, "grad_norm": 0.4575299322605133, "learning_rate": 8.649088292515151e-08, "loss": 0.4628, "step": 6742 }, { "epoch": 2.8399550751088025, "grad_norm": 0.42618581652641296, "learning_rate": 8.603757504289212e-08, "loss": 0.3915, "step": 6743 }, { "epoch": 2.840376245963779, "grad_norm": 0.38987454771995544, "learning_rate": 8.558544789179279e-08, "loss": 0.3766, "step": 6744 }, { "epoch": 2.840797416818756, "grad_norm": 0.4632019102573395, "learning_rate": 8.513450158049109e-08, "loss": 0.4098, "step": 6745 }, { "epoch": 2.8412185876737333, "grad_norm": 0.41671836376190186, "learning_rate": 8.468473621734197e-08, "loss": 0.4015, "step": 6746 }, { "epoch": 2.84163975852871, "grad_norm": 0.46462154388427734, "learning_rate": 8.4236151910414e-08, "loss": 0.4563, "step": 6747 }, { "epoch": 2.8420609293836865, "grad_norm": 0.36392292380332947, "learning_rate": 8.378874876749433e-08, "loss": 0.3558, "step": 6748 }, { "epoch": 2.8424821002386635, "grad_norm": 0.3987826108932495, "learning_rate": 8.33425268960858e-08, "loss": 0.3921, "step": 6749 }, { "epoch": 2.84290327109364, "grad_norm": 0.3959667682647705, "learning_rate": 8.289748640340544e-08, "loss": 0.4115, "step": 6750 }, { "epoch": 2.843324441948617, "grad_norm": 0.40626683831214905, "learning_rate": 8.24536273963894e-08, "loss": 0.4015, "step": 6751 }, { "epoch": 2.843745612803594, "grad_norm": 0.3915427029132843, "learning_rate": 8.201094998168734e-08, "loss": 0.3571, "step": 6752 }, { "epoch": 2.844166783658571, "grad_norm": 0.3797905743122101, "learning_rate": 8.156945426566699e-08, "loss": 0.3616, "step": 6753 }, { "epoch": 2.8445879545135475, "grad_norm": 0.4005506932735443, "learning_rate": 8.112914035441177e-08, "loss": 0.3891, "step": 6754 }, { "epoch": 2.8450091253685246, "grad_norm": 0.4245407283306122, "learning_rate": 8.069000835371932e-08, "loss": 0.4863, "step": 6755 }, { "epoch": 2.845430296223501, "grad_norm": 0.34989726543426514, "learning_rate": 8.025205836910522e-08, "loss": 0.3556, "step": 6756 }, { "epoch": 2.8458514670784782, "grad_norm": 0.4198167622089386, "learning_rate": 7.98152905058014e-08, "loss": 0.3961, "step": 6757 }, { "epoch": 2.846272637933455, "grad_norm": 0.4201446771621704, "learning_rate": 7.93797048687539e-08, "loss": 0.4224, "step": 6758 }, { "epoch": 2.846693808788432, "grad_norm": 0.3959694802761078, "learning_rate": 7.89453015626257e-08, "loss": 0.4036, "step": 6759 }, { "epoch": 2.8471149796434085, "grad_norm": 0.4053642153739929, "learning_rate": 7.85120806917955e-08, "loss": 0.3944, "step": 6760 }, { "epoch": 2.8475361504983856, "grad_norm": 0.3919089734554291, "learning_rate": 7.808004236035838e-08, "loss": 0.4093, "step": 6761 }, { "epoch": 2.847957321353362, "grad_norm": 0.3821040391921997, "learning_rate": 7.764918667212462e-08, "loss": 0.4058, "step": 6762 }, { "epoch": 2.8483784922083393, "grad_norm": 0.4510727524757385, "learning_rate": 7.721951373062086e-08, "loss": 0.4258, "step": 6763 }, { "epoch": 2.848799663063316, "grad_norm": 0.42524856328964233, "learning_rate": 7.679102363908842e-08, "loss": 0.4542, "step": 6764 }, { "epoch": 2.849220833918293, "grad_norm": 0.3542293310165405, "learning_rate": 7.636371650048658e-08, "loss": 0.3163, "step": 6765 }, { "epoch": 2.8496420047732696, "grad_norm": 0.42312759160995483, "learning_rate": 7.593759241748821e-08, "loss": 0.4966, "step": 6766 }, { "epoch": 2.8500631756282466, "grad_norm": 0.49227821826934814, "learning_rate": 7.55126514924831e-08, "loss": 0.4771, "step": 6767 }, { "epoch": 2.8504843464832232, "grad_norm": 0.40465739369392395, "learning_rate": 7.508889382757623e-08, "loss": 0.3185, "step": 6768 }, { "epoch": 2.8509055173382003, "grad_norm": 0.42056554555892944, "learning_rate": 7.466631952458836e-08, "loss": 0.4631, "step": 6769 }, { "epoch": 2.851326688193177, "grad_norm": 0.4335422217845917, "learning_rate": 7.424492868505661e-08, "loss": 0.3895, "step": 6770 }, { "epoch": 2.851747859048154, "grad_norm": 0.4508577585220337, "learning_rate": 7.382472141023223e-08, "loss": 0.4593, "step": 6771 }, { "epoch": 2.8521690299031306, "grad_norm": 0.42466050386428833, "learning_rate": 7.34056978010833e-08, "loss": 0.4248, "step": 6772 }, { "epoch": 2.8525902007581077, "grad_norm": 0.35325634479522705, "learning_rate": 7.298785795829433e-08, "loss": 0.3965, "step": 6773 }, { "epoch": 2.8530113716130843, "grad_norm": 0.36049458384513855, "learning_rate": 7.257120198226219e-08, "loss": 0.408, "step": 6774 }, { "epoch": 2.8534325424680613, "grad_norm": 0.39100462198257446, "learning_rate": 7.215572997310293e-08, "loss": 0.382, "step": 6775 }, { "epoch": 2.853853713323038, "grad_norm": 0.4344487488269806, "learning_rate": 7.174144203064615e-08, "loss": 0.4024, "step": 6776 }, { "epoch": 2.854274884178015, "grad_norm": 0.3996311128139496, "learning_rate": 7.132833825443664e-08, "loss": 0.3902, "step": 6777 }, { "epoch": 2.8546960550329916, "grad_norm": 0.40474748611450195, "learning_rate": 7.091641874373723e-08, "loss": 0.4055, "step": 6778 }, { "epoch": 2.8551172258879687, "grad_norm": 0.41047587990760803, "learning_rate": 7.050568359752152e-08, "loss": 0.4414, "step": 6779 }, { "epoch": 2.8555383967429453, "grad_norm": 0.38260743021965027, "learning_rate": 7.00961329144828e-08, "loss": 0.3619, "step": 6780 }, { "epoch": 2.8559595675979224, "grad_norm": 0.4196651577949524, "learning_rate": 6.968776679302902e-08, "loss": 0.4259, "step": 6781 }, { "epoch": 2.856380738452899, "grad_norm": 0.39627310633659363, "learning_rate": 6.928058533128112e-08, "loss": 0.4184, "step": 6782 }, { "epoch": 2.856801909307876, "grad_norm": 0.43654894828796387, "learning_rate": 6.887458862707864e-08, "loss": 0.3907, "step": 6783 }, { "epoch": 2.8572230801628526, "grad_norm": 0.4095674157142639, "learning_rate": 6.84697767779735e-08, "loss": 0.4124, "step": 6784 }, { "epoch": 2.8576442510178297, "grad_norm": 0.46019411087036133, "learning_rate": 6.806614988123461e-08, "loss": 0.4335, "step": 6785 }, { "epoch": 2.8580654218728063, "grad_norm": 0.4688440263271332, "learning_rate": 6.766370803384604e-08, "loss": 0.4265, "step": 6786 }, { "epoch": 2.8584865927277834, "grad_norm": 0.37978053092956543, "learning_rate": 6.726245133250653e-08, "loss": 0.3499, "step": 6787 }, { "epoch": 2.85890776358276, "grad_norm": 0.39372968673706055, "learning_rate": 6.68623798736312e-08, "loss": 0.4265, "step": 6788 }, { "epoch": 2.859328934437737, "grad_norm": 0.3735487461090088, "learning_rate": 6.64634937533487e-08, "loss": 0.3942, "step": 6789 }, { "epoch": 2.8597501052927137, "grad_norm": 0.4314059019088745, "learning_rate": 6.606579306750405e-08, "loss": 0.4402, "step": 6790 }, { "epoch": 2.8601712761476907, "grad_norm": 0.4437861740589142, "learning_rate": 6.566927791165689e-08, "loss": 0.435, "step": 6791 }, { "epoch": 2.8605924470026673, "grad_norm": 0.44466209411621094, "learning_rate": 6.527394838108214e-08, "loss": 0.3895, "step": 6792 }, { "epoch": 2.8610136178576444, "grad_norm": 0.42645785212516785, "learning_rate": 6.487980457077103e-08, "loss": 0.4142, "step": 6793 }, { "epoch": 2.861434788712621, "grad_norm": 0.42601916193962097, "learning_rate": 6.448684657542781e-08, "loss": 0.4019, "step": 6794 }, { "epoch": 2.861855959567598, "grad_norm": 0.44861799478530884, "learning_rate": 6.409507448947195e-08, "loss": 0.4879, "step": 6795 }, { "epoch": 2.8622771304225747, "grad_norm": 0.4101201295852661, "learning_rate": 6.370448840704036e-08, "loss": 0.378, "step": 6796 }, { "epoch": 2.8626983012775513, "grad_norm": 0.3968644142150879, "learning_rate": 6.331508842198297e-08, "loss": 0.3578, "step": 6797 }, { "epoch": 2.8631194721325284, "grad_norm": 0.4225582778453827, "learning_rate": 6.292687462786496e-08, "loss": 0.422, "step": 6798 }, { "epoch": 2.8635406429875054, "grad_norm": 0.4005856513977051, "learning_rate": 6.253984711796612e-08, "loss": 0.3303, "step": 6799 }, { "epoch": 2.863961813842482, "grad_norm": 0.3764699101448059, "learning_rate": 6.215400598528265e-08, "loss": 0.4404, "step": 6800 }, { "epoch": 2.8643829846974587, "grad_norm": 0.41597598791122437, "learning_rate": 6.176935132252426e-08, "loss": 0.3966, "step": 6801 }, { "epoch": 2.8648041555524357, "grad_norm": 0.4852552115917206, "learning_rate": 6.138588322211647e-08, "loss": 0.4693, "step": 6802 }, { "epoch": 2.865225326407413, "grad_norm": 0.3934175968170166, "learning_rate": 6.100360177619946e-08, "loss": 0.3345, "step": 6803 }, { "epoch": 2.8656464972623894, "grad_norm": 0.4395446181297302, "learning_rate": 6.062250707662753e-08, "loss": 0.3899, "step": 6804 }, { "epoch": 2.866067668117366, "grad_norm": 0.41277801990509033, "learning_rate": 6.024259921497077e-08, "loss": 0.422, "step": 6805 }, { "epoch": 2.866488838972343, "grad_norm": 0.4141305983066559, "learning_rate": 5.98638782825134e-08, "loss": 0.3844, "step": 6806 }, { "epoch": 2.86691000982732, "grad_norm": 0.41774943470954895, "learning_rate": 5.9486344370255913e-08, "loss": 0.4843, "step": 6807 }, { "epoch": 2.8673311806822968, "grad_norm": 0.42952513694763184, "learning_rate": 5.910999756891134e-08, "loss": 0.3945, "step": 6808 }, { "epoch": 2.8677523515372734, "grad_norm": 0.42618271708488464, "learning_rate": 5.8734837968909e-08, "loss": 0.4179, "step": 6809 }, { "epoch": 2.8681735223922504, "grad_norm": 0.4110458195209503, "learning_rate": 5.83608656603929e-08, "loss": 0.4093, "step": 6810 }, { "epoch": 2.8685946932472275, "grad_norm": 0.3913625180721283, "learning_rate": 5.79880807332206e-08, "loss": 0.4069, "step": 6811 }, { "epoch": 2.869015864102204, "grad_norm": 0.4355223476886749, "learning_rate": 5.7616483276966006e-08, "loss": 0.4296, "step": 6812 }, { "epoch": 2.8694370349571807, "grad_norm": 0.38940465450286865, "learning_rate": 5.7246073380917144e-08, "loss": 0.3725, "step": 6813 }, { "epoch": 2.869858205812158, "grad_norm": 0.45403072237968445, "learning_rate": 5.687685113407504e-08, "loss": 0.4238, "step": 6814 }, { "epoch": 2.870279376667135, "grad_norm": 0.4730968773365021, "learning_rate": 5.650881662515762e-08, "loss": 0.4248, "step": 6815 }, { "epoch": 2.8707005475221115, "grad_norm": 0.4328927993774414, "learning_rate": 5.6141969942596906e-08, "loss": 0.4122, "step": 6816 }, { "epoch": 2.871121718377088, "grad_norm": 0.3689348101615906, "learning_rate": 5.577631117453797e-08, "loss": 0.4032, "step": 6817 }, { "epoch": 2.871542889232065, "grad_norm": 0.42285141348838806, "learning_rate": 5.5411840408843284e-08, "loss": 0.3749, "step": 6818 }, { "epoch": 2.871964060087042, "grad_norm": 0.42755818367004395, "learning_rate": 5.504855773308671e-08, "loss": 0.4073, "step": 6819 }, { "epoch": 2.872385230942019, "grad_norm": 0.4083682894706726, "learning_rate": 5.468646323455895e-08, "loss": 0.3939, "step": 6820 }, { "epoch": 2.8728064017969954, "grad_norm": 0.4313713610172272, "learning_rate": 5.4325557000263765e-08, "loss": 0.455, "step": 6821 }, { "epoch": 2.8732275726519725, "grad_norm": 0.419451504945755, "learning_rate": 5.396583911692066e-08, "loss": 0.41, "step": 6822 }, { "epoch": 2.8736487435069495, "grad_norm": 0.3876017928123474, "learning_rate": 5.3607309670962724e-08, "loss": 0.4269, "step": 6823 }, { "epoch": 2.874069914361926, "grad_norm": 0.3986714482307434, "learning_rate": 5.324996874853772e-08, "loss": 0.3815, "step": 6824 }, { "epoch": 2.8744910852169028, "grad_norm": 0.4845813810825348, "learning_rate": 5.289381643550751e-08, "loss": 0.4241, "step": 6825 }, { "epoch": 2.87491225607188, "grad_norm": 0.467711478471756, "learning_rate": 5.2538852817449194e-08, "loss": 0.4504, "step": 6826 }, { "epoch": 2.875333426926857, "grad_norm": 0.4139256775379181, "learning_rate": 5.2185077979652884e-08, "loss": 0.3391, "step": 6827 }, { "epoch": 2.8757545977818335, "grad_norm": 0.40109989047050476, "learning_rate": 5.183249200712559e-08, "loss": 0.3665, "step": 6828 }, { "epoch": 2.87617576863681, "grad_norm": 0.42612892389297485, "learning_rate": 5.148109498458509e-08, "loss": 0.4006, "step": 6829 }, { "epoch": 2.876596939491787, "grad_norm": 0.4164819121360779, "learning_rate": 5.1130886996466625e-08, "loss": 0.363, "step": 6830 }, { "epoch": 2.8770181103467642, "grad_norm": 0.44696250557899475, "learning_rate": 5.078186812691788e-08, "loss": 0.4501, "step": 6831 }, { "epoch": 2.877439281201741, "grad_norm": 0.35568472743034363, "learning_rate": 5.0434038459801213e-08, "loss": 0.3765, "step": 6832 }, { "epoch": 2.8778604520567175, "grad_norm": 0.4344234764575958, "learning_rate": 5.0087398078694785e-08, "loss": 0.4102, "step": 6833 }, { "epoch": 2.8782816229116945, "grad_norm": 0.37815025448799133, "learning_rate": 4.974194706688751e-08, "loss": 0.4021, "step": 6834 }, { "epoch": 2.8787027937666716, "grad_norm": 0.46199047565460205, "learning_rate": 4.939768550738688e-08, "loss": 0.4358, "step": 6835 }, { "epoch": 2.879123964621648, "grad_norm": 0.37945622205734253, "learning_rate": 4.905461348291008e-08, "loss": 0.3819, "step": 6836 }, { "epoch": 2.879545135476625, "grad_norm": 0.38550257682800293, "learning_rate": 4.8712731075892807e-08, "loss": 0.3677, "step": 6837 }, { "epoch": 2.879966306331602, "grad_norm": 0.39008787274360657, "learning_rate": 4.8372038368482146e-08, "loss": 0.393, "step": 6838 }, { "epoch": 2.880387477186579, "grad_norm": 0.4204101264476776, "learning_rate": 4.8032535442539277e-08, "loss": 0.3948, "step": 6839 }, { "epoch": 2.8808086480415556, "grad_norm": 0.37967649102211, "learning_rate": 4.769422237964172e-08, "loss": 0.3691, "step": 6840 }, { "epoch": 2.881229818896532, "grad_norm": 0.38812047243118286, "learning_rate": 4.735709926107779e-08, "loss": 0.4281, "step": 6841 }, { "epoch": 2.8816509897515092, "grad_norm": 0.4017334580421448, "learning_rate": 4.702116616785324e-08, "loss": 0.3592, "step": 6842 }, { "epoch": 2.8820721606064863, "grad_norm": 0.42406946420669556, "learning_rate": 4.668642318068628e-08, "loss": 0.4046, "step": 6843 }, { "epoch": 2.882493331461463, "grad_norm": 0.4082072079181671, "learning_rate": 4.635287038000813e-08, "loss": 0.4543, "step": 6844 }, { "epoch": 2.8829145023164395, "grad_norm": 0.4052448272705078, "learning_rate": 4.6020507845966345e-08, "loss": 0.4174, "step": 6845 }, { "epoch": 2.8833356731714166, "grad_norm": 0.4383080303668976, "learning_rate": 4.568933565842037e-08, "loss": 0.3856, "step": 6846 }, { "epoch": 2.8837568440263937, "grad_norm": 0.39163514971733093, "learning_rate": 4.53593538969449e-08, "loss": 0.3866, "step": 6847 }, { "epoch": 2.8841780148813703, "grad_norm": 0.390230655670166, "learning_rate": 4.5030562640829275e-08, "loss": 0.3843, "step": 6848 }, { "epoch": 2.884599185736347, "grad_norm": 0.37676864862442017, "learning_rate": 4.4702961969073644e-08, "loss": 0.3876, "step": 6849 }, { "epoch": 2.885020356591324, "grad_norm": 0.418118417263031, "learning_rate": 4.437655196039559e-08, "loss": 0.3964, "step": 6850 }, { "epoch": 2.8854415274463006, "grad_norm": 0.4812144637107849, "learning_rate": 4.4051332693225146e-08, "loss": 0.4505, "step": 6851 }, { "epoch": 2.8858626983012776, "grad_norm": 0.43446534872055054, "learning_rate": 4.372730424570537e-08, "loss": 0.4739, "step": 6852 }, { "epoch": 2.8862838691562542, "grad_norm": 0.43019866943359375, "learning_rate": 4.3404466695696203e-08, "loss": 0.4328, "step": 6853 }, { "epoch": 2.8867050400112313, "grad_norm": 0.3742181062698364, "learning_rate": 4.30828201207667e-08, "loss": 0.3561, "step": 6854 }, { "epoch": 2.887126210866208, "grad_norm": 0.35724547505378723, "learning_rate": 4.2762364598203373e-08, "loss": 0.4005, "step": 6855 }, { "epoch": 2.887547381721185, "grad_norm": 0.37534284591674805, "learning_rate": 4.244310020500686e-08, "loss": 0.3632, "step": 6856 }, { "epoch": 2.8879685525761616, "grad_norm": 0.41676056385040283, "learning_rate": 4.212502701788801e-08, "loss": 0.4577, "step": 6857 }, { "epoch": 2.8883897234311386, "grad_norm": 0.44007402658462524, "learning_rate": 4.180814511327569e-08, "loss": 0.4122, "step": 6858 }, { "epoch": 2.8888108942861153, "grad_norm": 0.41783806681632996, "learning_rate": 4.1492454567309545e-08, "loss": 0.3892, "step": 6859 }, { "epoch": 2.8892320651410923, "grad_norm": 0.38254988193511963, "learning_rate": 4.117795545584447e-08, "loss": 0.3508, "step": 6860 }, { "epoch": 2.889653235996069, "grad_norm": 0.4197342097759247, "learning_rate": 4.086464785444777e-08, "loss": 0.4562, "step": 6861 }, { "epoch": 2.890074406851046, "grad_norm": 0.4039471745491028, "learning_rate": 4.0552531838402574e-08, "loss": 0.4251, "step": 6862 }, { "epoch": 2.8904955777060226, "grad_norm": 0.40549907088279724, "learning_rate": 4.024160748270278e-08, "loss": 0.387, "step": 6863 }, { "epoch": 2.8909167485609997, "grad_norm": 0.44663378596305847, "learning_rate": 3.9931874862058626e-08, "loss": 0.4037, "step": 6864 }, { "epoch": 2.8913379194159763, "grad_norm": 0.4008742570877075, "learning_rate": 3.96233340508928e-08, "loss": 0.4048, "step": 6865 }, { "epoch": 2.8917590902709533, "grad_norm": 0.43215322494506836, "learning_rate": 3.931598512334212e-08, "loss": 0.4342, "step": 6866 }, { "epoch": 2.89218026112593, "grad_norm": 0.4044431746006012, "learning_rate": 3.900982815325582e-08, "loss": 0.3986, "step": 6867 }, { "epoch": 2.892601431980907, "grad_norm": 0.4099419116973877, "learning_rate": 3.8704863214197865e-08, "loss": 0.3884, "step": 6868 }, { "epoch": 2.8930226028358836, "grad_norm": 0.4209756851196289, "learning_rate": 3.840109037944573e-08, "loss": 0.3739, "step": 6869 }, { "epoch": 2.8934437736908607, "grad_norm": 0.4417235553264618, "learning_rate": 3.8098509721989915e-08, "loss": 0.4718, "step": 6870 }, { "epoch": 2.8938649445458373, "grad_norm": 0.39146560430526733, "learning_rate": 3.77971213145345e-08, "loss": 0.3733, "step": 6871 }, { "epoch": 2.8942861154008144, "grad_norm": 0.3958002030849457, "learning_rate": 3.7496925229498774e-08, "loss": 0.4266, "step": 6872 }, { "epoch": 2.894707286255791, "grad_norm": 0.45684072375297546, "learning_rate": 3.7197921539012246e-08, "loss": 0.4345, "step": 6873 }, { "epoch": 2.895128457110768, "grad_norm": 0.389029860496521, "learning_rate": 3.69001103149208e-08, "loss": 0.321, "step": 6874 }, { "epoch": 2.8955496279657447, "grad_norm": 0.4529678523540497, "learning_rate": 3.6603491628783296e-08, "loss": 0.4256, "step": 6875 }, { "epoch": 2.8959707988207217, "grad_norm": 0.40587013959884644, "learning_rate": 3.6308065551869944e-08, "loss": 0.4706, "step": 6876 }, { "epoch": 2.8963919696756983, "grad_norm": 0.3997332453727722, "learning_rate": 3.60138321551673e-08, "loss": 0.3739, "step": 6877 }, { "epoch": 2.8968131405306754, "grad_norm": 0.43150269985198975, "learning_rate": 3.57207915093738e-08, "loss": 0.4679, "step": 6878 }, { "epoch": 2.897234311385652, "grad_norm": 0.459375262260437, "learning_rate": 3.5428943684900906e-08, "loss": 0.432, "step": 6879 }, { "epoch": 2.897655482240629, "grad_norm": 0.39587000012397766, "learning_rate": 3.513828875187475e-08, "loss": 0.4099, "step": 6880 }, { "epoch": 2.8980766530956057, "grad_norm": 0.41621360182762146, "learning_rate": 3.48488267801339e-08, "loss": 0.3989, "step": 6881 }, { "epoch": 2.8984978239505828, "grad_norm": 0.4213617742061615, "learning_rate": 3.456055783923051e-08, "loss": 0.4235, "step": 6882 }, { "epoch": 2.8989189948055594, "grad_norm": 0.37787070870399475, "learning_rate": 3.4273481998429727e-08, "loss": 0.4089, "step": 6883 }, { "epoch": 2.8993401656605364, "grad_norm": 0.38219255208969116, "learning_rate": 3.3987599326710806e-08, "loss": 0.4082, "step": 6884 }, { "epoch": 2.899761336515513, "grad_norm": 0.4180326461791992, "learning_rate": 3.370290989276603e-08, "loss": 0.4374, "step": 6885 }, { "epoch": 2.90018250737049, "grad_norm": 0.39020493626594543, "learning_rate": 3.3419413765000684e-08, "loss": 0.3715, "step": 6886 }, { "epoch": 2.9006036782254667, "grad_norm": 0.3845578134059906, "learning_rate": 3.3137111011533604e-08, "loss": 0.411, "step": 6887 }, { "epoch": 2.901024849080444, "grad_norm": 0.4116682708263397, "learning_rate": 3.28560017001961e-08, "loss": 0.401, "step": 6888 }, { "epoch": 2.9014460199354204, "grad_norm": 0.40755975246429443, "learning_rate": 3.257608589853356e-08, "loss": 0.443, "step": 6889 }, { "epoch": 2.9018671907903975, "grad_norm": 0.4334484934806824, "learning_rate": 3.229736367380498e-08, "loss": 0.3904, "step": 6890 }, { "epoch": 2.902288361645374, "grad_norm": 0.3801475465297699, "learning_rate": 3.2019835092981786e-08, "loss": 0.3841, "step": 6891 }, { "epoch": 2.902709532500351, "grad_norm": 0.36551231145858765, "learning_rate": 3.174350022274841e-08, "loss": 0.3667, "step": 6892 }, { "epoch": 2.9031307033553277, "grad_norm": 0.43415239453315735, "learning_rate": 3.1468359129502854e-08, "loss": 0.4496, "step": 6893 }, { "epoch": 2.903551874210305, "grad_norm": 0.4257408380508423, "learning_rate": 3.1194411879357237e-08, "loss": 0.3716, "step": 6894 }, { "epoch": 2.9039730450652814, "grad_norm": 0.43495801091194153, "learning_rate": 3.092165853813389e-08, "loss": 0.4343, "step": 6895 }, { "epoch": 2.9043942159202585, "grad_norm": 0.40435290336608887, "learning_rate": 3.065009917137263e-08, "loss": 0.381, "step": 6896 }, { "epoch": 2.904815386775235, "grad_norm": 0.3968032896518707, "learning_rate": 3.03797338443218e-08, "loss": 0.4258, "step": 6897 }, { "epoch": 2.9052365576302117, "grad_norm": 0.39694541692733765, "learning_rate": 3.0110562621946114e-08, "loss": 0.3664, "step": 6898 }, { "epoch": 2.9056577284851888, "grad_norm": 0.4513360857963562, "learning_rate": 2.9842585568922723e-08, "loss": 0.4079, "step": 6899 }, { "epoch": 2.906078899340166, "grad_norm": 0.4090195596218109, "learning_rate": 2.9575802749640116e-08, "loss": 0.392, "step": 6900 }, { "epoch": 2.9065000701951424, "grad_norm": 0.3734285235404968, "learning_rate": 2.9310214228202016e-08, "loss": 0.3565, "step": 6901 }, { "epoch": 2.906921241050119, "grad_norm": 0.4234819710254669, "learning_rate": 2.9045820068424046e-08, "loss": 0.4411, "step": 6902 }, { "epoch": 2.907342411905096, "grad_norm": 0.38180550932884216, "learning_rate": 2.878262033383483e-08, "loss": 0.3833, "step": 6903 }, { "epoch": 2.907763582760073, "grad_norm": 0.4047662019729614, "learning_rate": 2.8520615087676563e-08, "loss": 0.3581, "step": 6904 }, { "epoch": 2.90818475361505, "grad_norm": 0.38618019223213196, "learning_rate": 2.825980439290388e-08, "loss": 0.3764, "step": 6905 }, { "epoch": 2.9086059244700264, "grad_norm": 0.47743937373161316, "learning_rate": 2.8000188312184428e-08, "loss": 0.4869, "step": 6906 }, { "epoch": 2.9090270953250035, "grad_norm": 0.4468318819999695, "learning_rate": 2.774176690789998e-08, "loss": 0.4107, "step": 6907 }, { "epoch": 2.9094482661799805, "grad_norm": 0.4227042496204376, "learning_rate": 2.748454024214309e-08, "loss": 0.4545, "step": 6908 }, { "epoch": 2.909869437034957, "grad_norm": 0.439708948135376, "learning_rate": 2.7228508376720974e-08, "loss": 0.3834, "step": 6909 }, { "epoch": 2.9102906078899338, "grad_norm": 0.39117851853370667, "learning_rate": 2.697367137315221e-08, "loss": 0.3438, "step": 6910 }, { "epoch": 2.910711778744911, "grad_norm": 0.4461962878704071, "learning_rate": 2.6720029292670592e-08, "loss": 0.4686, "step": 6911 }, { "epoch": 2.911132949599888, "grad_norm": 0.40505731105804443, "learning_rate": 2.6467582196221808e-08, "loss": 0.406, "step": 6912 }, { "epoch": 2.9115541204548645, "grad_norm": 0.4115745425224304, "learning_rate": 2.6216330144462343e-08, "loss": 0.392, "step": 6913 }, { "epoch": 2.911975291309841, "grad_norm": 0.3967137932777405, "learning_rate": 2.5966273197763903e-08, "loss": 0.452, "step": 6914 }, { "epoch": 2.912396462164818, "grad_norm": 0.37045082449913025, "learning_rate": 2.57174114162112e-08, "loss": 0.3531, "step": 6915 }, { "epoch": 2.9128176330197952, "grad_norm": 0.40509212017059326, "learning_rate": 2.5469744859599743e-08, "loss": 0.4415, "step": 6916 }, { "epoch": 2.913238803874772, "grad_norm": 0.44091081619262695, "learning_rate": 2.5223273587440256e-08, "loss": 0.4526, "step": 6917 }, { "epoch": 2.9136599747297485, "grad_norm": 0.37565892934799194, "learning_rate": 2.4977997658954257e-08, "loss": 0.3892, "step": 6918 }, { "epoch": 2.9140811455847255, "grad_norm": 0.37764355540275574, "learning_rate": 2.4733917133077378e-08, "loss": 0.3642, "step": 6919 }, { "epoch": 2.9145023164397026, "grad_norm": 0.44898417592048645, "learning_rate": 2.4491032068457154e-08, "loss": 0.398, "step": 6920 }, { "epoch": 2.914923487294679, "grad_norm": 0.4387419521808624, "learning_rate": 2.4249342523453567e-08, "loss": 0.4069, "step": 6921 }, { "epoch": 2.915344658149656, "grad_norm": 0.39123162627220154, "learning_rate": 2.400884855614183e-08, "loss": 0.4405, "step": 6922 }, { "epoch": 2.915765829004633, "grad_norm": 0.34423214197158813, "learning_rate": 2.376955022430627e-08, "loss": 0.3599, "step": 6923 }, { "epoch": 2.91618699985961, "grad_norm": 0.38025999069213867, "learning_rate": 2.3531447585446453e-08, "loss": 0.4159, "step": 6924 }, { "epoch": 2.9166081707145866, "grad_norm": 0.3615713119506836, "learning_rate": 2.329454069677384e-08, "loss": 0.3958, "step": 6925 }, { "epoch": 2.917029341569563, "grad_norm": 0.4722668528556824, "learning_rate": 2.305882961521233e-08, "loss": 0.4704, "step": 6926 }, { "epoch": 2.9174505124245402, "grad_norm": 0.420445054769516, "learning_rate": 2.2824314397399404e-08, "loss": 0.3781, "step": 6927 }, { "epoch": 2.9178716832795173, "grad_norm": 0.43083977699279785, "learning_rate": 2.259099509968443e-08, "loss": 0.4138, "step": 6928 }, { "epoch": 2.918292854134494, "grad_norm": 0.38575053215026855, "learning_rate": 2.2358871778128677e-08, "loss": 0.3701, "step": 6929 }, { "epoch": 2.9187140249894705, "grad_norm": 0.40804779529571533, "learning_rate": 2.212794448850808e-08, "loss": 0.4188, "step": 6930 }, { "epoch": 2.9191351958444476, "grad_norm": 0.4279399812221527, "learning_rate": 2.1898213286309366e-08, "loss": 0.3847, "step": 6931 }, { "epoch": 2.9195563666994246, "grad_norm": 0.4137670695781708, "learning_rate": 2.1669678226732828e-08, "loss": 0.3659, "step": 6932 }, { "epoch": 2.9199775375544013, "grad_norm": 0.4107076823711395, "learning_rate": 2.14423393646912e-08, "loss": 0.41, "step": 6933 }, { "epoch": 2.920398708409378, "grad_norm": 0.39430201053619385, "learning_rate": 2.1216196754809127e-08, "loss": 0.3896, "step": 6934 }, { "epoch": 2.920819879264355, "grad_norm": 0.38690823316574097, "learning_rate": 2.0991250451424806e-08, "loss": 0.3956, "step": 6935 }, { "epoch": 2.921241050119332, "grad_norm": 0.39614614844322205, "learning_rate": 2.076750050858889e-08, "loss": 0.3623, "step": 6936 }, { "epoch": 2.9216622209743086, "grad_norm": 0.4402104616165161, "learning_rate": 2.0544946980063375e-08, "loss": 0.478, "step": 6937 }, { "epoch": 2.9220833918292852, "grad_norm": 0.4596608877182007, "learning_rate": 2.0323589919323816e-08, "loss": 0.3882, "step": 6938 }, { "epoch": 2.9225045626842623, "grad_norm": 0.4264304041862488, "learning_rate": 2.0103429379558226e-08, "loss": 0.3959, "step": 6939 }, { "epoch": 2.9229257335392393, "grad_norm": 0.4352145195007324, "learning_rate": 1.9884465413667066e-08, "loss": 0.4136, "step": 6940 }, { "epoch": 2.923346904394216, "grad_norm": 0.44314515590667725, "learning_rate": 1.9666698074263246e-08, "loss": 0.424, "step": 6941 }, { "epoch": 2.9237680752491926, "grad_norm": 0.45553016662597656, "learning_rate": 1.9450127413672137e-08, "loss": 0.4054, "step": 6942 }, { "epoch": 2.9241892461041696, "grad_norm": 0.40480130910873413, "learning_rate": 1.9234753483931e-08, "loss": 0.4027, "step": 6943 }, { "epoch": 2.9246104169591467, "grad_norm": 0.4243111312389374, "learning_rate": 1.9020576336790665e-08, "loss": 0.4464, "step": 6944 }, { "epoch": 2.9250315878141233, "grad_norm": 0.40893998742103577, "learning_rate": 1.8807596023713292e-08, "loss": 0.3982, "step": 6945 }, { "epoch": 2.9254527586691, "grad_norm": 0.40085333585739136, "learning_rate": 1.8595812595874063e-08, "loss": 0.3964, "step": 6946 }, { "epoch": 2.925873929524077, "grad_norm": 0.437613844871521, "learning_rate": 1.8385226104161158e-08, "loss": 0.4522, "step": 6947 }, { "epoch": 2.9262951003790536, "grad_norm": 0.4167284369468689, "learning_rate": 1.8175836599173545e-08, "loss": 0.3666, "step": 6948 }, { "epoch": 2.9267162712340307, "grad_norm": 0.4459599554538727, "learning_rate": 1.7967644131224314e-08, "loss": 0.435, "step": 6949 }, { "epoch": 2.9271374420890073, "grad_norm": 0.4726656377315521, "learning_rate": 1.776064875033734e-08, "loss": 0.3922, "step": 6950 }, { "epoch": 2.9275586129439843, "grad_norm": 0.4095819890499115, "learning_rate": 1.7554850506250608e-08, "loss": 0.4232, "step": 6951 }, { "epoch": 2.927979783798961, "grad_norm": 1.0854438543319702, "learning_rate": 1.735024944841235e-08, "loss": 0.4066, "step": 6952 }, { "epoch": 2.928400954653938, "grad_norm": 0.4177494943141937, "learning_rate": 1.7146845625985454e-08, "loss": 0.398, "step": 6953 }, { "epoch": 2.9288221255089146, "grad_norm": 0.4676395058631897, "learning_rate": 1.6944639087843052e-08, "loss": 0.4351, "step": 6954 }, { "epoch": 2.9292432963638917, "grad_norm": 0.42228206992149353, "learning_rate": 1.6743629882572387e-08, "loss": 0.3766, "step": 6955 }, { "epoch": 2.9296644672188683, "grad_norm": 0.4319152235984802, "learning_rate": 1.654381805847094e-08, "loss": 0.4101, "step": 6956 }, { "epoch": 2.9300856380738454, "grad_norm": 0.40364181995391846, "learning_rate": 1.6345203663550858e-08, "loss": 0.392, "step": 6957 }, { "epoch": 2.930506808928822, "grad_norm": 0.39109092950820923, "learning_rate": 1.6147786745535078e-08, "loss": 0.3768, "step": 6958 }, { "epoch": 2.930927979783799, "grad_norm": 0.41837480664253235, "learning_rate": 1.5951567351858433e-08, "loss": 0.4152, "step": 6959 }, { "epoch": 2.9313491506387757, "grad_norm": 0.4425264596939087, "learning_rate": 1.575654552966932e-08, "loss": 0.4166, "step": 6960 }, { "epoch": 2.9317703214937527, "grad_norm": 0.4371725022792816, "learning_rate": 1.5562721325826925e-08, "loss": 0.4251, "step": 6961 }, { "epoch": 2.9321914923487293, "grad_norm": 0.3502649664878845, "learning_rate": 1.5370094786905097e-08, "loss": 0.3406, "step": 6962 }, { "epoch": 2.9326126632037064, "grad_norm": 0.39403384923934937, "learning_rate": 1.5178665959186822e-08, "loss": 0.3944, "step": 6963 }, { "epoch": 2.933033834058683, "grad_norm": 0.4570276737213135, "learning_rate": 1.4988434888669746e-08, "loss": 0.4199, "step": 6964 }, { "epoch": 2.93345500491366, "grad_norm": 0.4363197982311249, "learning_rate": 1.4799401621061749e-08, "loss": 0.4168, "step": 6965 }, { "epoch": 2.9338761757686367, "grad_norm": 0.40801534056663513, "learning_rate": 1.4611566201785388e-08, "loss": 0.4266, "step": 6966 }, { "epoch": 2.9342973466236137, "grad_norm": 0.337251752614975, "learning_rate": 1.4424928675972338e-08, "loss": 0.3073, "step": 6967 }, { "epoch": 2.9347185174785904, "grad_norm": 0.4451153874397278, "learning_rate": 1.423948908846895e-08, "loss": 0.4838, "step": 6968 }, { "epoch": 2.9351396883335674, "grad_norm": 0.43770235776901245, "learning_rate": 1.4055247483832356e-08, "loss": 0.4384, "step": 6969 }, { "epoch": 2.935560859188544, "grad_norm": 0.43725699186325073, "learning_rate": 1.3872203906332703e-08, "loss": 0.4049, "step": 6970 }, { "epoch": 2.935982030043521, "grad_norm": 0.4306465983390808, "learning_rate": 1.369035839995203e-08, "loss": 0.4163, "step": 6971 }, { "epoch": 2.9364032008984977, "grad_norm": 0.3799893856048584, "learning_rate": 1.3509711008383163e-08, "loss": 0.3987, "step": 6972 }, { "epoch": 2.9368243717534748, "grad_norm": 0.3940219283103943, "learning_rate": 1.3330261775033605e-08, "loss": 0.4489, "step": 6973 }, { "epoch": 2.9372455426084514, "grad_norm": 0.39631327986717224, "learning_rate": 1.3152010743020526e-08, "loss": 0.4076, "step": 6974 }, { "epoch": 2.9376667134634284, "grad_norm": 0.4024074077606201, "learning_rate": 1.2974957955175227e-08, "loss": 0.4121, "step": 6975 }, { "epoch": 2.938087884318405, "grad_norm": 0.4266752600669861, "learning_rate": 1.279910345403923e-08, "loss": 0.4478, "step": 6976 }, { "epoch": 2.938509055173382, "grad_norm": 0.38082563877105713, "learning_rate": 1.2624447281867625e-08, "loss": 0.397, "step": 6977 }, { "epoch": 2.9389302260283587, "grad_norm": 0.3726789355278015, "learning_rate": 1.2450989480626841e-08, "loss": 0.3921, "step": 6978 }, { "epoch": 2.939351396883336, "grad_norm": 0.4127156734466553, "learning_rate": 1.227873009199465e-08, "loss": 0.4341, "step": 6979 }, { "epoch": 2.9397725677383124, "grad_norm": 0.41908201575279236, "learning_rate": 1.2107669157362944e-08, "loss": 0.4024, "step": 6980 }, { "epoch": 2.9401937385932895, "grad_norm": 0.40508872270584106, "learning_rate": 1.1937806717833844e-08, "loss": 0.3659, "step": 6981 }, { "epoch": 2.940614909448266, "grad_norm": 0.4489046037197113, "learning_rate": 1.1769142814221367e-08, "loss": 0.4251, "step": 6982 }, { "epoch": 2.941036080303243, "grad_norm": 0.38611358404159546, "learning_rate": 1.1601677487053652e-08, "loss": 0.3802, "step": 6983 }, { "epoch": 2.9414572511582198, "grad_norm": 0.4073733389377594, "learning_rate": 1.1435410776567957e-08, "loss": 0.3869, "step": 6984 }, { "epoch": 2.941878422013197, "grad_norm": 0.39981570839881897, "learning_rate": 1.127034272271621e-08, "loss": 0.3634, "step": 6985 }, { "epoch": 2.9422995928681734, "grad_norm": 0.4141933023929596, "learning_rate": 1.110647336516002e-08, "loss": 0.4396, "step": 6986 }, { "epoch": 2.9427207637231505, "grad_norm": 0.4098515510559082, "learning_rate": 1.0943802743275112e-08, "loss": 0.4191, "step": 6987 }, { "epoch": 2.943141934578127, "grad_norm": 0.3923689126968384, "learning_rate": 1.0782330896146887e-08, "loss": 0.3648, "step": 6988 }, { "epoch": 2.943563105433104, "grad_norm": 0.42976614832878113, "learning_rate": 1.0622057862575419e-08, "loss": 0.4062, "step": 6989 }, { "epoch": 2.943984276288081, "grad_norm": 0.4325104355812073, "learning_rate": 1.0462983681069904e-08, "loss": 0.3812, "step": 6990 }, { "epoch": 2.944405447143058, "grad_norm": 0.485756516456604, "learning_rate": 1.0305108389853657e-08, "loss": 0.4147, "step": 6991 }, { "epoch": 2.9448266179980345, "grad_norm": 0.4183306396007538, "learning_rate": 1.0148432026860778e-08, "loss": 0.3601, "step": 6992 }, { "epoch": 2.9452477888530115, "grad_norm": 0.3874088525772095, "learning_rate": 9.992954629737817e-09, "loss": 0.4102, "step": 6993 }, { "epoch": 2.945668959707988, "grad_norm": 0.3559817671775818, "learning_rate": 9.83867623584267e-09, "loss": 0.351, "step": 6994 }, { "epoch": 2.946090130562965, "grad_norm": 0.4365696907043457, "learning_rate": 9.68559688224624e-09, "loss": 0.4494, "step": 6995 }, { "epoch": 2.946511301417942, "grad_norm": 0.38010862469673157, "learning_rate": 9.533716605729104e-09, "loss": 0.4032, "step": 6996 }, { "epoch": 2.946932472272919, "grad_norm": 0.42134490609169006, "learning_rate": 9.383035442786514e-09, "loss": 0.4147, "step": 6997 }, { "epoch": 2.9473536431278955, "grad_norm": 0.42287927865982056, "learning_rate": 9.233553429623953e-09, "loss": 0.4154, "step": 6998 }, { "epoch": 2.947774813982872, "grad_norm": 0.4199519157409668, "learning_rate": 9.085270602158803e-09, "loss": 0.3956, "step": 6999 }, { "epoch": 2.948195984837849, "grad_norm": 0.43745067715644836, "learning_rate": 8.938186996020893e-09, "loss": 0.4256, "step": 7000 }, { "epoch": 2.9486171556928262, "grad_norm": 0.3483726978302002, "learning_rate": 8.792302646551398e-09, "loss": 0.3575, "step": 7001 }, { "epoch": 2.949038326547803, "grad_norm": 0.4055536389350891, "learning_rate": 8.647617588803948e-09, "loss": 0.4212, "step": 7002 }, { "epoch": 2.9494594974027795, "grad_norm": 0.41058897972106934, "learning_rate": 8.504131857542952e-09, "loss": 0.4097, "step": 7003 }, { "epoch": 2.9498806682577565, "grad_norm": 0.41818317770957947, "learning_rate": 8.36184548724528e-09, "loss": 0.3769, "step": 7004 }, { "epoch": 2.9503018391127336, "grad_norm": 0.3917641341686249, "learning_rate": 8.220758512100247e-09, "loss": 0.3812, "step": 7005 }, { "epoch": 2.95072300996771, "grad_norm": 0.38784125447273254, "learning_rate": 8.080870966008513e-09, "loss": 0.4035, "step": 7006 }, { "epoch": 2.951144180822687, "grad_norm": 0.3700891137123108, "learning_rate": 7.942182882580974e-09, "loss": 0.3899, "step": 7007 }, { "epoch": 2.951565351677664, "grad_norm": 0.4051223397254944, "learning_rate": 7.804694295143189e-09, "loss": 0.4161, "step": 7008 }, { "epoch": 2.951986522532641, "grad_norm": 0.3971080183982849, "learning_rate": 7.668405236730402e-09, "loss": 0.4014, "step": 7009 }, { "epoch": 2.9524076933876175, "grad_norm": 0.4088856279850006, "learning_rate": 7.533315740090308e-09, "loss": 0.4097, "step": 7010 }, { "epoch": 2.952828864242594, "grad_norm": 0.44056808948516846, "learning_rate": 7.39942583768305e-09, "loss": 0.4341, "step": 7011 }, { "epoch": 2.9532500350975712, "grad_norm": 0.45772475004196167, "learning_rate": 7.266735561678451e-09, "loss": 0.4364, "step": 7012 }, { "epoch": 2.9536712059525483, "grad_norm": 0.40434256196022034, "learning_rate": 7.135244943959896e-09, "loss": 0.3861, "step": 7013 }, { "epoch": 2.954092376807525, "grad_norm": 0.4208710193634033, "learning_rate": 7.0049540161232224e-09, "loss": 0.4063, "step": 7014 }, { "epoch": 2.9545135476625015, "grad_norm": 0.43669813871383667, "learning_rate": 6.8758628094733885e-09, "loss": 0.4299, "step": 7015 }, { "epoch": 2.9549347185174786, "grad_norm": 0.38273322582244873, "learning_rate": 6.747971355028915e-09, "loss": 0.4017, "step": 7016 }, { "epoch": 2.9553558893724556, "grad_norm": 0.39093419909477234, "learning_rate": 6.6212796835202205e-09, "loss": 0.3385, "step": 7017 }, { "epoch": 2.9557770602274323, "grad_norm": 0.4251418709754944, "learning_rate": 6.495787825388511e-09, "loss": 0.4176, "step": 7018 }, { "epoch": 2.956198231082409, "grad_norm": 0.39571428298950195, "learning_rate": 6.371495810786887e-09, "loss": 0.3424, "step": 7019 }, { "epoch": 2.956619401937386, "grad_norm": 0.40605419874191284, "learning_rate": 6.24840366958035e-09, "loss": 0.4264, "step": 7020 }, { "epoch": 2.957040572792363, "grad_norm": 0.4219038486480713, "learning_rate": 6.126511431346349e-09, "loss": 0.4033, "step": 7021 }, { "epoch": 2.9574617436473396, "grad_norm": 0.4487576484680176, "learning_rate": 6.0058191253720145e-09, "loss": 0.4505, "step": 7022 }, { "epoch": 2.957882914502316, "grad_norm": 0.4690798819065094, "learning_rate": 5.886326780658591e-09, "loss": 0.4098, "step": 7023 }, { "epoch": 2.9583040853572933, "grad_norm": 0.332492858171463, "learning_rate": 5.7680344259170014e-09, "loss": 0.4012, "step": 7024 }, { "epoch": 2.9587252562122703, "grad_norm": 0.37830013036727905, "learning_rate": 5.650942089571177e-09, "loss": 0.4398, "step": 7025 }, { "epoch": 2.959146427067247, "grad_norm": 0.4020935595035553, "learning_rate": 5.535049799755832e-09, "loss": 0.397, "step": 7026 }, { "epoch": 2.9595675979222236, "grad_norm": 0.39844441413879395, "learning_rate": 5.420357584317582e-09, "loss": 0.3934, "step": 7027 }, { "epoch": 2.9599887687772006, "grad_norm": 0.397081583738327, "learning_rate": 5.306865470814937e-09, "loss": 0.3506, "step": 7028 }, { "epoch": 2.9604099396321777, "grad_norm": 0.4006982445716858, "learning_rate": 5.194573486517751e-09, "loss": 0.4082, "step": 7029 }, { "epoch": 2.9608311104871543, "grad_norm": 0.4145205318927765, "learning_rate": 5.083481658408329e-09, "loss": 0.4463, "step": 7030 }, { "epoch": 2.961252281342131, "grad_norm": 0.42385226488113403, "learning_rate": 4.973590013178653e-09, "loss": 0.3664, "step": 7031 }, { "epoch": 2.961673452197108, "grad_norm": 0.4432499408721924, "learning_rate": 4.864898577234822e-09, "loss": 0.4444, "step": 7032 }, { "epoch": 2.962094623052085, "grad_norm": 0.39783623814582825, "learning_rate": 4.757407376692613e-09, "loss": 0.3884, "step": 7033 }, { "epoch": 2.9625157939070617, "grad_norm": 0.37862351536750793, "learning_rate": 4.651116437379699e-09, "loss": 0.3638, "step": 7034 }, { "epoch": 2.9629369647620383, "grad_norm": 0.4679699242115021, "learning_rate": 4.546025784837316e-09, "loss": 0.5002, "step": 7035 }, { "epoch": 2.9633581356170153, "grad_norm": 0.4169761836528778, "learning_rate": 4.442135444315265e-09, "loss": 0.3744, "step": 7036 }, { "epoch": 2.9637793064719924, "grad_norm": 0.4490903615951538, "learning_rate": 4.339445440776358e-09, "loss": 0.4599, "step": 7037 }, { "epoch": 2.964200477326969, "grad_norm": 0.4109959006309509, "learning_rate": 4.237955798896409e-09, "loss": 0.332, "step": 7038 }, { "epoch": 2.9646216481819456, "grad_norm": 0.50787353515625, "learning_rate": 4.137666543059804e-09, "loss": 0.4988, "step": 7039 }, { "epoch": 2.9650428190369227, "grad_norm": 0.35178160667419434, "learning_rate": 4.038577697365598e-09, "loss": 0.387, "step": 7040 }, { "epoch": 2.9654639898918997, "grad_norm": 0.4045146703720093, "learning_rate": 3.940689285621968e-09, "loss": 0.4078, "step": 7041 }, { "epoch": 2.9658851607468764, "grad_norm": 0.3958394527435303, "learning_rate": 3.844001331349545e-09, "loss": 0.4094, "step": 7042 }, { "epoch": 2.966306331601853, "grad_norm": 0.4816572368144989, "learning_rate": 3.74851385778141e-09, "loss": 0.435, "step": 7043 }, { "epoch": 2.96672750245683, "grad_norm": 0.4494752585887909, "learning_rate": 3.6542268878608788e-09, "loss": 0.4076, "step": 7044 }, { "epoch": 2.967148673311807, "grad_norm": 0.35810592770576477, "learning_rate": 3.561140444243716e-09, "loss": 0.3618, "step": 7045 }, { "epoch": 2.9675698441667837, "grad_norm": 0.44778865575790405, "learning_rate": 3.4692545492959196e-09, "loss": 0.4273, "step": 7046 }, { "epoch": 2.9679910150217603, "grad_norm": 0.3883369266986847, "learning_rate": 3.378569225096495e-09, "loss": 0.3812, "step": 7047 }, { "epoch": 2.9684121858767374, "grad_norm": 0.43845611810684204, "learning_rate": 3.28908449343579e-09, "loss": 0.382, "step": 7048 }, { "epoch": 2.968833356731714, "grad_norm": 0.41372939944267273, "learning_rate": 3.2008003758143834e-09, "loss": 0.446, "step": 7049 }, { "epoch": 2.969254527586691, "grad_norm": 0.40010714530944824, "learning_rate": 3.113716893445862e-09, "loss": 0.3935, "step": 7050 }, { "epoch": 2.9696756984416677, "grad_norm": 0.4101422131061554, "learning_rate": 3.027834067254598e-09, "loss": 0.4008, "step": 7051 }, { "epoch": 2.9700968692966447, "grad_norm": 0.40382567048072815, "learning_rate": 2.943151917876863e-09, "loss": 0.3982, "step": 7052 }, { "epoch": 2.9705180401516214, "grad_norm": 0.4444213807582855, "learning_rate": 2.859670465659159e-09, "loss": 0.3759, "step": 7053 }, { "epoch": 2.9709392110065984, "grad_norm": 0.37022197246551514, "learning_rate": 2.7773897306615504e-09, "loss": 0.3924, "step": 7054 }, { "epoch": 2.971360381861575, "grad_norm": 0.4614658057689667, "learning_rate": 2.696309732654334e-09, "loss": 0.4532, "step": 7055 }, { "epoch": 2.971781552716552, "grad_norm": 0.44919824600219727, "learning_rate": 2.6164304911191486e-09, "loss": 0.4254, "step": 7056 }, { "epoch": 2.9722027235715287, "grad_norm": 0.39290615916252136, "learning_rate": 2.537752025249529e-09, "loss": 0.3786, "step": 7057 }, { "epoch": 2.9726238944265058, "grad_norm": 0.5035229325294495, "learning_rate": 2.4602743539503536e-09, "loss": 0.4354, "step": 7058 }, { "epoch": 2.9730450652814824, "grad_norm": 0.4021434783935547, "learning_rate": 2.383997495838397e-09, "loss": 0.4017, "step": 7059 }, { "epoch": 2.9734662361364594, "grad_norm": 0.41452088952064514, "learning_rate": 2.308921469241221e-09, "loss": 0.4343, "step": 7060 }, { "epoch": 2.973887406991436, "grad_norm": 0.4131024479866028, "learning_rate": 2.235046292198284e-09, "loss": 0.3726, "step": 7061 }, { "epoch": 2.974308577846413, "grad_norm": 0.43620559573173523, "learning_rate": 2.162371982459832e-09, "loss": 0.3998, "step": 7062 }, { "epoch": 2.9747297487013897, "grad_norm": 0.4089302718639374, "learning_rate": 2.090898557489118e-09, "loss": 0.4204, "step": 7063 }, { "epoch": 2.975150919556367, "grad_norm": 0.4076550006866455, "learning_rate": 2.0206260344590724e-09, "loss": 0.4147, "step": 7064 }, { "epoch": 2.9755720904113434, "grad_norm": 0.47676998376846313, "learning_rate": 1.951554430255076e-09, "loss": 0.4229, "step": 7065 }, { "epoch": 2.9759932612663205, "grad_norm": 0.4307117164134979, "learning_rate": 1.8836837614738535e-09, "loss": 0.3873, "step": 7066 }, { "epoch": 2.976414432121297, "grad_norm": 0.41900932788848877, "learning_rate": 1.8170140444234707e-09, "loss": 0.3945, "step": 7067 }, { "epoch": 2.976835602976274, "grad_norm": 0.37417763471603394, "learning_rate": 1.7515452951227807e-09, "loss": 0.3891, "step": 7068 }, { "epoch": 2.9772567738312508, "grad_norm": 0.4227985739707947, "learning_rate": 1.6872775293030885e-09, "loss": 0.3807, "step": 7069 }, { "epoch": 2.977677944686228, "grad_norm": 0.4395178258419037, "learning_rate": 1.6242107624070414e-09, "loss": 0.4238, "step": 7070 }, { "epoch": 2.9780991155412044, "grad_norm": 0.44713300466537476, "learning_rate": 1.5623450095880731e-09, "loss": 0.4144, "step": 7071 }, { "epoch": 2.9785202863961815, "grad_norm": 0.48250678181648254, "learning_rate": 1.5016802857109603e-09, "loss": 0.4293, "step": 7072 }, { "epoch": 2.978941457251158, "grad_norm": 0.4010922908782959, "learning_rate": 1.4422166053529308e-09, "loss": 0.3826, "step": 7073 }, { "epoch": 2.979362628106135, "grad_norm": 0.45303279161453247, "learning_rate": 1.3839539828014447e-09, "loss": 0.4444, "step": 7074 }, { "epoch": 2.979783798961112, "grad_norm": 0.4065573215484619, "learning_rate": 1.3268924320564147e-09, "loss": 0.3956, "step": 7075 }, { "epoch": 2.980204969816089, "grad_norm": 0.4096183180809021, "learning_rate": 1.2710319668279846e-09, "loss": 0.4105, "step": 7076 }, { "epoch": 2.9806261406710655, "grad_norm": 0.43066325783729553, "learning_rate": 1.216372600538751e-09, "loss": 0.4178, "step": 7077 }, { "epoch": 2.9810473115260425, "grad_norm": 0.42617830634117126, "learning_rate": 1.1629143463226522e-09, "loss": 0.4268, "step": 7078 }, { "epoch": 2.981468482381019, "grad_norm": 0.4181024432182312, "learning_rate": 1.1106572170238583e-09, "loss": 0.4196, "step": 7079 }, { "epoch": 2.981889653235996, "grad_norm": 0.3642377555370331, "learning_rate": 1.059601225199547e-09, "loss": 0.3943, "step": 7080 }, { "epoch": 2.982310824090973, "grad_norm": 0.4100293517112732, "learning_rate": 1.0097463831171273e-09, "loss": 0.4285, "step": 7081 }, { "epoch": 2.98273199494595, "grad_norm": 0.36675721406936646, "learning_rate": 9.610927027559057e-10, "loss": 0.3579, "step": 7082 }, { "epoch": 2.9831531658009265, "grad_norm": 0.4022180736064911, "learning_rate": 9.136401958059759e-10, "loss": 0.3932, "step": 7083 }, { "epoch": 2.9835743366559035, "grad_norm": 0.4102228879928589, "learning_rate": 8.673888736698832e-10, "loss": 0.3775, "step": 7084 }, { "epoch": 2.98399550751088, "grad_norm": 0.387114554643631, "learning_rate": 8.22338747460405e-10, "loss": 0.4035, "step": 7085 }, { "epoch": 2.9844166783658572, "grad_norm": 0.4050769507884979, "learning_rate": 7.784898280027708e-10, "loss": 0.3669, "step": 7086 }, { "epoch": 2.984837849220834, "grad_norm": 0.46247297525405884, "learning_rate": 7.358421258324422e-10, "loss": 0.4487, "step": 7087 }, { "epoch": 2.985259020075811, "grad_norm": 0.37364599108695984, "learning_rate": 6.943956511973326e-10, "loss": 0.3868, "step": 7088 }, { "epoch": 2.9856801909307875, "grad_norm": 0.40111684799194336, "learning_rate": 6.541504140561427e-10, "loss": 0.3713, "step": 7089 }, { "epoch": 2.9861013617857646, "grad_norm": 0.3757031261920929, "learning_rate": 6.151064240789151e-10, "loss": 0.368, "step": 7090 }, { "epoch": 2.986522532640741, "grad_norm": 0.4172065556049347, "learning_rate": 5.772636906470341e-10, "loss": 0.4501, "step": 7091 }, { "epoch": 2.9869437034957182, "grad_norm": 0.4118277430534363, "learning_rate": 5.406222228537816e-10, "loss": 0.3554, "step": 7092 }, { "epoch": 2.987364874350695, "grad_norm": 0.4151762127876282, "learning_rate": 5.051820295032262e-10, "loss": 0.4284, "step": 7093 }, { "epoch": 2.987786045205672, "grad_norm": 0.3758140802383423, "learning_rate": 4.709431191107783e-10, "loss": 0.3608, "step": 7094 }, { "epoch": 2.9882072160606485, "grad_norm": 0.3701837956905365, "learning_rate": 4.3790549990319066e-10, "loss": 0.4107, "step": 7095 }, { "epoch": 2.9886283869156256, "grad_norm": 0.3607836663722992, "learning_rate": 4.060691798196681e-10, "loss": 0.3815, "step": 7096 }, { "epoch": 2.989049557770602, "grad_norm": 0.4033247232437134, "learning_rate": 3.7543416650909214e-10, "loss": 0.4354, "step": 7097 }, { "epoch": 2.9894707286255793, "grad_norm": 0.45079588890075684, "learning_rate": 3.4600046733279656e-10, "loss": 0.4037, "step": 7098 }, { "epoch": 2.989891899480556, "grad_norm": 0.447381854057312, "learning_rate": 3.1776808936290203e-10, "loss": 0.4424, "step": 7099 }, { "epoch": 2.9903130703355325, "grad_norm": 0.3542710840702057, "learning_rate": 2.9073703938342636e-10, "loss": 0.3478, "step": 7100 }, { "epoch": 2.9907342411905096, "grad_norm": 0.46871036291122437, "learning_rate": 2.649073238891742e-10, "loss": 0.4711, "step": 7101 }, { "epoch": 2.9911554120454866, "grad_norm": 0.3951120972633362, "learning_rate": 2.402789490862922e-10, "loss": 0.3307, "step": 7102 }, { "epoch": 2.9915765829004632, "grad_norm": 0.4521670937538147, "learning_rate": 2.1685192089282438e-10, "loss": 0.503, "step": 7103 }, { "epoch": 2.99199775375544, "grad_norm": 0.4101623594760895, "learning_rate": 1.946262449381564e-10, "loss": 0.387, "step": 7104 }, { "epoch": 2.992418924610417, "grad_norm": 0.3732460141181946, "learning_rate": 1.7360192656246112e-10, "loss": 0.3972, "step": 7105 }, { "epoch": 2.992840095465394, "grad_norm": 0.40946245193481445, "learning_rate": 1.5377897081669814e-10, "loss": 0.3962, "step": 7106 }, { "epoch": 2.9932612663203706, "grad_norm": 0.5471957325935364, "learning_rate": 1.3515738246538957e-10, "loss": 0.4553, "step": 7107 }, { "epoch": 2.993682437175347, "grad_norm": 0.37710729241371155, "learning_rate": 1.1773716598162398e-10, "loss": 0.4418, "step": 7108 }, { "epoch": 2.9941036080303243, "grad_norm": 0.38726770877838135, "learning_rate": 1.0151832555205243e-10, "loss": 0.4292, "step": 7109 }, { "epoch": 2.9945247788853013, "grad_norm": 0.3622537851333618, "learning_rate": 8.650086507300259e-11, "loss": 0.3598, "step": 7110 }, { "epoch": 2.994945949740278, "grad_norm": 0.4252617061138153, "learning_rate": 7.268478815380953e-11, "loss": 0.4498, "step": 7111 }, { "epoch": 2.9953671205952546, "grad_norm": 0.4388391673564911, "learning_rate": 6.007009811348496e-11, "loss": 0.423, "step": 7112 }, { "epoch": 2.9957882914502316, "grad_norm": 0.41917309165000916, "learning_rate": 4.865679798293776e-11, "loss": 0.4056, "step": 7113 }, { "epoch": 2.9962094623052087, "grad_norm": 0.3900809586048126, "learning_rate": 3.844489050497391e-11, "loss": 0.396, "step": 7114 }, { "epoch": 2.9966306331601853, "grad_norm": 0.4138427674770355, "learning_rate": 2.9434378133741394e-11, "loss": 0.426, "step": 7115 }, { "epoch": 2.997051804015162, "grad_norm": 0.39416998624801636, "learning_rate": 2.1625263033620003e-11, "loss": 0.3787, "step": 7116 }, { "epoch": 2.997472974870139, "grad_norm": 0.42594122886657715, "learning_rate": 1.501754708088665e-11, "loss": 0.3753, "step": 7117 }, { "epoch": 2.997894145725116, "grad_norm": 0.4194762408733368, "learning_rate": 9.611231863715375e-12, "loss": 0.4352, "step": 7118 }, { "epoch": 2.9983153165800926, "grad_norm": 0.4165741801261902, "learning_rate": 5.406318681067113e-12, "loss": 0.3651, "step": 7119 }, { "epoch": 2.9987364874350693, "grad_norm": 0.4018743932247162, "learning_rate": 2.402808543244817e-12, "loss": 0.4504, "step": 7120 }, { "epoch": 2.9991576582900463, "grad_norm": 0.40170201659202576, "learning_rate": 6.007021718934525e-13, "loss": 0.4187, "step": 7121 }, { "epoch": 2.9995788291450234, "grad_norm": 0.42323440313339233, "learning_rate": 0.0, "loss": 0.4045, "step": 7122 }, { "epoch": 2.9995788291450234, "step": 7122, "total_flos": 5242880291733504.0, "train_loss": 0.453794626281088, "train_runtime": 124970.6924, "train_samples_per_second": 5.471, "train_steps_per_second": 0.057 } ], "logging_steps": 1.0, "max_steps": 7122, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5242880291733504.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }