| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.36469730123997085, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00036469730123997083, | |
| "grad_norm": 0.2268046736717224, | |
| "learning_rate": 4e-05, | |
| "loss": 0.6811, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007293946024799417, | |
| "grad_norm": 0.2326797991991043, | |
| "learning_rate": 8e-05, | |
| "loss": 0.5105, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0010940919037199124, | |
| "grad_norm": 0.22285917401313782, | |
| "learning_rate": 0.00012, | |
| "loss": 0.5868, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0014587892049598833, | |
| "grad_norm": 0.19026874005794525, | |
| "learning_rate": 0.00016, | |
| "loss": 0.4699, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0018234865061998542, | |
| "grad_norm": 0.2471790909767151, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6535, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002188183807439825, | |
| "grad_norm": 0.24426604807376862, | |
| "learning_rate": 0.00019992692729265622, | |
| "loss": 0.5943, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.002552881108679796, | |
| "grad_norm": 0.22199125587940216, | |
| "learning_rate": 0.0001998538545853124, | |
| "loss": 0.5346, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0029175784099197666, | |
| "grad_norm": 0.28295931220054626, | |
| "learning_rate": 0.00019978078187796858, | |
| "loss": 0.5446, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0032822757111597373, | |
| "grad_norm": 0.28008294105529785, | |
| "learning_rate": 0.0001997077091706248, | |
| "loss": 0.714, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0036469730123997084, | |
| "grad_norm": 0.23789669573307037, | |
| "learning_rate": 0.00019963463646328097, | |
| "loss": 0.505, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0040116703136396795, | |
| "grad_norm": 0.31461969017982483, | |
| "learning_rate": 0.00019956156375593718, | |
| "loss": 0.6611, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.00437636761487965, | |
| "grad_norm": 0.31836387515068054, | |
| "learning_rate": 0.00019948849104859336, | |
| "loss": 0.5954, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004741064916119621, | |
| "grad_norm": 0.24425436556339264, | |
| "learning_rate": 0.00019941541834124954, | |
| "loss": 0.4776, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.005105762217359592, | |
| "grad_norm": 0.2626420259475708, | |
| "learning_rate": 0.00019934234563390575, | |
| "loss": 0.451, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005470459518599562, | |
| "grad_norm": 0.3038848042488098, | |
| "learning_rate": 0.00019926927292656196, | |
| "loss": 0.7207, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.005835156819839533, | |
| "grad_norm": 0.25323376059532166, | |
| "learning_rate": 0.0001991962002192181, | |
| "loss": 0.4251, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006199854121079504, | |
| "grad_norm": 0.3116416931152344, | |
| "learning_rate": 0.00019912312751187432, | |
| "loss": 0.724, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.006564551422319475, | |
| "grad_norm": 0.28092512488365173, | |
| "learning_rate": 0.00019905005480453053, | |
| "loss": 0.6591, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.006929248723559446, | |
| "grad_norm": 0.3688206076622009, | |
| "learning_rate": 0.0001989769820971867, | |
| "loss": 0.8551, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.007293946024799417, | |
| "grad_norm": 0.2335461676120758, | |
| "learning_rate": 0.00019890390938984292, | |
| "loss": 0.3711, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007658643326039387, | |
| "grad_norm": 0.28437790274620056, | |
| "learning_rate": 0.0001988308366824991, | |
| "loss": 0.4625, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.008023340627279359, | |
| "grad_norm": 0.31511029601097107, | |
| "learning_rate": 0.00019875776397515528, | |
| "loss": 0.6102, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008388037928519328, | |
| "grad_norm": 0.2957281768321991, | |
| "learning_rate": 0.0001986846912678115, | |
| "loss": 0.6009, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0087527352297593, | |
| "grad_norm": 0.26605701446533203, | |
| "learning_rate": 0.00019861161856046767, | |
| "loss": 0.4492, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.00911743253099927, | |
| "grad_norm": 0.2744329273700714, | |
| "learning_rate": 0.00019853854585312385, | |
| "loss": 0.4166, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.009482129832239242, | |
| "grad_norm": 0.3793030083179474, | |
| "learning_rate": 0.00019846547314578006, | |
| "loss": 0.7185, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.009846827133479213, | |
| "grad_norm": 0.3891250789165497, | |
| "learning_rate": 0.00019839240043843624, | |
| "loss": 0.9231, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.010211524434719184, | |
| "grad_norm": 0.4095641076564789, | |
| "learning_rate": 0.00019831932773109245, | |
| "loss": 0.706, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.010576221735959153, | |
| "grad_norm": 0.3578352928161621, | |
| "learning_rate": 0.00019824625502374866, | |
| "loss": 0.5935, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.010940919037199124, | |
| "grad_norm": 0.3827204406261444, | |
| "learning_rate": 0.0001981731823164048, | |
| "loss": 0.8403, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011305616338439095, | |
| "grad_norm": 0.3178398609161377, | |
| "learning_rate": 0.00019810010960906102, | |
| "loss": 0.588, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.011670313639679067, | |
| "grad_norm": 0.3158668279647827, | |
| "learning_rate": 0.00019802703690171723, | |
| "loss": 0.5753, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.012035010940919038, | |
| "grad_norm": 0.26300448179244995, | |
| "learning_rate": 0.0001979539641943734, | |
| "loss": 0.573, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.012399708242159009, | |
| "grad_norm": 0.2760365605354309, | |
| "learning_rate": 0.0001978808914870296, | |
| "loss": 0.5887, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.012764405543398978, | |
| "grad_norm": 0.34060901403427124, | |
| "learning_rate": 0.0001978078187796858, | |
| "loss": 0.7347, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01312910284463895, | |
| "grad_norm": 0.2601701319217682, | |
| "learning_rate": 0.00019773474607234198, | |
| "loss": 0.5684, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01349380014587892, | |
| "grad_norm": 0.33629322052001953, | |
| "learning_rate": 0.0001976616733649982, | |
| "loss": 0.8124, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.013858497447118891, | |
| "grad_norm": 0.36825031042099, | |
| "learning_rate": 0.00019758860065765437, | |
| "loss": 0.8472, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014223194748358862, | |
| "grad_norm": 0.28288817405700684, | |
| "learning_rate": 0.00019751552795031055, | |
| "loss": 0.76, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.014587892049598834, | |
| "grad_norm": 0.29537615180015564, | |
| "learning_rate": 0.00019744245524296676, | |
| "loss": 0.7321, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014952589350838803, | |
| "grad_norm": 0.34148740768432617, | |
| "learning_rate": 0.00019736938253562297, | |
| "loss": 0.8342, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.015317286652078774, | |
| "grad_norm": 0.292447566986084, | |
| "learning_rate": 0.00019729630982827915, | |
| "loss": 0.6134, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.015681983953318747, | |
| "grad_norm": 0.2850889265537262, | |
| "learning_rate": 0.00019722323712093533, | |
| "loss": 0.6998, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.016046681254558718, | |
| "grad_norm": 0.3336108326911926, | |
| "learning_rate": 0.00019715016441359154, | |
| "loss": 0.8365, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.016411378555798686, | |
| "grad_norm": 0.34880322217941284, | |
| "learning_rate": 0.00019707709170624772, | |
| "loss": 0.8137, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.016776075857038657, | |
| "grad_norm": 0.27575618028640747, | |
| "learning_rate": 0.00019700401899890393, | |
| "loss": 0.7041, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.017140773158278628, | |
| "grad_norm": 0.32200825214385986, | |
| "learning_rate": 0.0001969309462915601, | |
| "loss": 0.9865, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0175054704595186, | |
| "grad_norm": 0.31082266569137573, | |
| "learning_rate": 0.0001968578735842163, | |
| "loss": 0.7872, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01787016776075857, | |
| "grad_norm": 0.3037458658218384, | |
| "learning_rate": 0.0001967848008768725, | |
| "loss": 1.005, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01823486506199854, | |
| "grad_norm": 0.22701998054981232, | |
| "learning_rate": 0.0001967117281695287, | |
| "loss": 0.4318, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018599562363238512, | |
| "grad_norm": 0.27476680278778076, | |
| "learning_rate": 0.00019663865546218486, | |
| "loss": 0.6872, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.018964259664478483, | |
| "grad_norm": 0.2562110424041748, | |
| "learning_rate": 0.00019656558275484107, | |
| "loss": 0.6356, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.019328956965718454, | |
| "grad_norm": 0.2805593013763428, | |
| "learning_rate": 0.00019649251004749728, | |
| "loss": 0.8285, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.019693654266958426, | |
| "grad_norm": 0.32811108231544495, | |
| "learning_rate": 0.00019641943734015346, | |
| "loss": 0.8711, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.020058351568198397, | |
| "grad_norm": 0.24847714602947235, | |
| "learning_rate": 0.00019634636463280967, | |
| "loss": 0.6721, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.020423048869438368, | |
| "grad_norm": 0.2888585031032562, | |
| "learning_rate": 0.00019627329192546585, | |
| "loss": 0.786, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.020787746170678335, | |
| "grad_norm": 0.2474086582660675, | |
| "learning_rate": 0.00019620021921812203, | |
| "loss": 0.6735, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.021152443471918306, | |
| "grad_norm": 0.28172221779823303, | |
| "learning_rate": 0.00019612714651077824, | |
| "loss": 0.7776, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.021517140773158278, | |
| "grad_norm": 0.3260613977909088, | |
| "learning_rate": 0.00019605407380343442, | |
| "loss": 0.8941, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.02188183807439825, | |
| "grad_norm": 0.2589282989501953, | |
| "learning_rate": 0.0001959810010960906, | |
| "loss": 0.6758, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02224653537563822, | |
| "grad_norm": 0.2978575825691223, | |
| "learning_rate": 0.0001959079283887468, | |
| "loss": 0.7084, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.02261123267687819, | |
| "grad_norm": 0.2522169351577759, | |
| "learning_rate": 0.000195834855681403, | |
| "loss": 0.6634, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.022975929978118162, | |
| "grad_norm": 0.3184927999973297, | |
| "learning_rate": 0.0001957617829740592, | |
| "loss": 0.9409, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.023340627279358133, | |
| "grad_norm": 0.24009554088115692, | |
| "learning_rate": 0.00019568871026671538, | |
| "loss": 0.6867, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.023705324580598104, | |
| "grad_norm": 0.2735375761985779, | |
| "learning_rate": 0.00019561563755937157, | |
| "loss": 0.6533, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.024070021881838075, | |
| "grad_norm": 0.2777388095855713, | |
| "learning_rate": 0.00019554256485202777, | |
| "loss": 0.7517, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.024434719183078046, | |
| "grad_norm": 0.271108478307724, | |
| "learning_rate": 0.00019546949214468398, | |
| "loss": 0.8724, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.024799416484318017, | |
| "grad_norm": 0.22648799419403076, | |
| "learning_rate": 0.00019539641943734016, | |
| "loss": 0.523, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02516411378555799, | |
| "grad_norm": 0.2377820461988449, | |
| "learning_rate": 0.00019532334672999634, | |
| "loss": 0.5099, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.025528811086797956, | |
| "grad_norm": 0.22092792391777039, | |
| "learning_rate": 0.00019525027402265255, | |
| "loss": 0.5447, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.025893508388037927, | |
| "grad_norm": 0.26954007148742676, | |
| "learning_rate": 0.00019517720131530873, | |
| "loss": 0.6141, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0262582056892779, | |
| "grad_norm": 0.2562531530857086, | |
| "learning_rate": 0.00019510412860796494, | |
| "loss": 0.7991, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02662290299051787, | |
| "grad_norm": 0.28438082337379456, | |
| "learning_rate": 0.00019503105590062112, | |
| "loss": 0.9198, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02698760029175784, | |
| "grad_norm": 0.1985676884651184, | |
| "learning_rate": 0.0001949579831932773, | |
| "loss": 0.4564, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02735229759299781, | |
| "grad_norm": 0.23653608560562134, | |
| "learning_rate": 0.00019488491048593351, | |
| "loss": 0.6741, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.027716994894237783, | |
| "grad_norm": 0.18751463294029236, | |
| "learning_rate": 0.00019481183777858972, | |
| "loss": 0.378, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.028081692195477754, | |
| "grad_norm": 0.2654147148132324, | |
| "learning_rate": 0.0001947387650712459, | |
| "loss": 0.6615, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.028446389496717725, | |
| "grad_norm": 0.2540780007839203, | |
| "learning_rate": 0.00019466569236390208, | |
| "loss": 0.661, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.028811086797957696, | |
| "grad_norm": 0.2665940821170807, | |
| "learning_rate": 0.0001945926196565583, | |
| "loss": 0.7608, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.029175784099197667, | |
| "grad_norm": 0.27822214365005493, | |
| "learning_rate": 0.00019451954694921447, | |
| "loss": 0.9142, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02954048140043764, | |
| "grad_norm": 0.26205846667289734, | |
| "learning_rate": 0.00019444647424187068, | |
| "loss": 0.7416, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.029905178701677606, | |
| "grad_norm": 0.2633398175239563, | |
| "learning_rate": 0.00019437340153452686, | |
| "loss": 0.6727, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.030269876002917577, | |
| "grad_norm": 0.2297828495502472, | |
| "learning_rate": 0.00019430032882718305, | |
| "loss": 0.5744, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.030634573304157548, | |
| "grad_norm": 0.27555879950523376, | |
| "learning_rate": 0.00019422725611983925, | |
| "loss": 0.7682, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03099927060539752, | |
| "grad_norm": 0.2495211362838745, | |
| "learning_rate": 0.00019415418341249546, | |
| "loss": 0.7603, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.031363967906637494, | |
| "grad_norm": 0.24564798176288605, | |
| "learning_rate": 0.00019408111070515162, | |
| "loss": 0.6745, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03172866520787746, | |
| "grad_norm": 0.2123216986656189, | |
| "learning_rate": 0.00019400803799780783, | |
| "loss": 0.5624, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.032093362509117436, | |
| "grad_norm": 0.26791101694107056, | |
| "learning_rate": 0.00019393496529046403, | |
| "loss": 0.8236, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.032458059810357404, | |
| "grad_norm": 0.24294224381446838, | |
| "learning_rate": 0.00019386189258312021, | |
| "loss": 0.7493, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.03282275711159737, | |
| "grad_norm": 0.25676801800727844, | |
| "learning_rate": 0.00019378881987577642, | |
| "loss": 0.8281, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.033187454412837346, | |
| "grad_norm": 0.22630107402801514, | |
| "learning_rate": 0.0001937157471684326, | |
| "loss": 0.6335, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.03355215171407731, | |
| "grad_norm": 0.25208771228790283, | |
| "learning_rate": 0.00019364267446108879, | |
| "loss": 0.7753, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.03391684901531729, | |
| "grad_norm": 0.23288992047309875, | |
| "learning_rate": 0.000193569601753745, | |
| "loss": 0.683, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.034281546316557256, | |
| "grad_norm": 0.2340337187051773, | |
| "learning_rate": 0.00019349652904640118, | |
| "loss": 0.7111, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03464624361779723, | |
| "grad_norm": 0.24910978972911835, | |
| "learning_rate": 0.00019342345633905736, | |
| "loss": 0.8183, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0350109409190372, | |
| "grad_norm": 0.23724719882011414, | |
| "learning_rate": 0.00019335038363171357, | |
| "loss": 0.7408, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.03537563822027717, | |
| "grad_norm": 0.2057395875453949, | |
| "learning_rate": 0.00019327731092436975, | |
| "loss": 0.5099, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.03574033552151714, | |
| "grad_norm": 0.22540345788002014, | |
| "learning_rate": 0.00019320423821702595, | |
| "loss": 0.7391, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.036105032822757115, | |
| "grad_norm": 0.2615845799446106, | |
| "learning_rate": 0.00019313116550968214, | |
| "loss": 0.7005, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.03646973012399708, | |
| "grad_norm": 0.19165730476379395, | |
| "learning_rate": 0.00019305809280233832, | |
| "loss": 0.4355, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03683442742523705, | |
| "grad_norm": 0.22737336158752441, | |
| "learning_rate": 0.00019298502009499453, | |
| "loss": 0.7579, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.037199124726477024, | |
| "grad_norm": 0.21350006759166718, | |
| "learning_rate": 0.00019291194738765073, | |
| "loss": 0.6608, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.03756382202771699, | |
| "grad_norm": 0.24027219414710999, | |
| "learning_rate": 0.00019283887468030692, | |
| "loss": 0.8235, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.03792851932895697, | |
| "grad_norm": 0.21983422338962555, | |
| "learning_rate": 0.0001927658019729631, | |
| "loss": 0.5319, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.038293216630196934, | |
| "grad_norm": 0.26618310809135437, | |
| "learning_rate": 0.0001926927292656193, | |
| "loss": 0.9259, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03865791393143691, | |
| "grad_norm": 0.24729526042938232, | |
| "learning_rate": 0.0001926196565582755, | |
| "loss": 0.7653, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.039022611232676876, | |
| "grad_norm": 0.2595866024494171, | |
| "learning_rate": 0.0001925465838509317, | |
| "loss": 0.7662, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.03938730853391685, | |
| "grad_norm": 0.16923396289348602, | |
| "learning_rate": 0.00019247351114358788, | |
| "loss": 0.3119, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03975200583515682, | |
| "grad_norm": 0.2592317759990692, | |
| "learning_rate": 0.00019240043843624406, | |
| "loss": 0.6767, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.04011670313639679, | |
| "grad_norm": 0.2139894813299179, | |
| "learning_rate": 0.00019232736572890027, | |
| "loss": 0.5058, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04048140043763676, | |
| "grad_norm": 0.2439870685338974, | |
| "learning_rate": 0.00019225429302155647, | |
| "loss": 0.807, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.040846097738876735, | |
| "grad_norm": 0.26212504506111145, | |
| "learning_rate": 0.00019218122031421263, | |
| "loss": 0.9942, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.0412107950401167, | |
| "grad_norm": 0.26018884778022766, | |
| "learning_rate": 0.00019210814760686884, | |
| "loss": 0.6573, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.04157549234135667, | |
| "grad_norm": 0.20158423483371735, | |
| "learning_rate": 0.00019203507489952505, | |
| "loss": 0.4533, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.041940189642596645, | |
| "grad_norm": 0.2270892709493637, | |
| "learning_rate": 0.00019196200219218123, | |
| "loss": 0.5983, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.04230488694383661, | |
| "grad_norm": 0.2140335738658905, | |
| "learning_rate": 0.00019188892948483744, | |
| "loss": 0.5686, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04266958424507659, | |
| "grad_norm": 0.25761061906814575, | |
| "learning_rate": 0.00019181585677749362, | |
| "loss": 0.7238, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.043034281546316555, | |
| "grad_norm": 0.24370697140693665, | |
| "learning_rate": 0.0001917427840701498, | |
| "loss": 0.7342, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04339897884755653, | |
| "grad_norm": 0.2632579207420349, | |
| "learning_rate": 0.000191669711362806, | |
| "loss": 0.8978, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.0437636761487965, | |
| "grad_norm": 0.22956986725330353, | |
| "learning_rate": 0.00019159663865546221, | |
| "loss": 0.6409, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04412837345003647, | |
| "grad_norm": 0.24121011793613434, | |
| "learning_rate": 0.00019152356594811837, | |
| "loss": 0.7556, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.04449307075127644, | |
| "grad_norm": 0.2375144511461258, | |
| "learning_rate": 0.00019145049324077458, | |
| "loss": 0.7303, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.044857768052516414, | |
| "grad_norm": 0.22452694177627563, | |
| "learning_rate": 0.00019137742053343079, | |
| "loss": 0.6456, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.04522246535375638, | |
| "grad_norm": 0.2590137720108032, | |
| "learning_rate": 0.00019130434782608697, | |
| "loss": 0.7438, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.045587162654996356, | |
| "grad_norm": 0.2955920696258545, | |
| "learning_rate": 0.00019123127511874318, | |
| "loss": 1.0269, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.045951859956236324, | |
| "grad_norm": 0.21322080492973328, | |
| "learning_rate": 0.00019115820241139936, | |
| "loss": 0.6143, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04631655725747629, | |
| "grad_norm": 0.20497886836528778, | |
| "learning_rate": 0.00019108512970405554, | |
| "loss": 0.5871, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.046681254558716266, | |
| "grad_norm": 0.26355209946632385, | |
| "learning_rate": 0.00019101205699671175, | |
| "loss": 0.8077, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.047045951859956234, | |
| "grad_norm": 0.21869653463363647, | |
| "learning_rate": 0.00019093898428936793, | |
| "loss": 0.6736, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.04741064916119621, | |
| "grad_norm": 0.21741290390491486, | |
| "learning_rate": 0.0001908659115820241, | |
| "loss": 0.597, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.047775346462436176, | |
| "grad_norm": 0.2503755986690521, | |
| "learning_rate": 0.00019079283887468032, | |
| "loss": 0.8038, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.04814004376367615, | |
| "grad_norm": 0.2087285965681076, | |
| "learning_rate": 0.0001907197661673365, | |
| "loss": 0.5409, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.04850474106491612, | |
| "grad_norm": 0.2347562611103058, | |
| "learning_rate": 0.0001906466934599927, | |
| "loss": 0.6741, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.04886943836615609, | |
| "grad_norm": 0.23479056358337402, | |
| "learning_rate": 0.0001905736207526489, | |
| "loss": 0.7257, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04923413566739606, | |
| "grad_norm": 0.2217235416173935, | |
| "learning_rate": 0.00019050054804530507, | |
| "loss": 0.6539, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.049598832968636035, | |
| "grad_norm": 0.23342272639274597, | |
| "learning_rate": 0.00019042747533796128, | |
| "loss": 0.6716, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.049963530269876, | |
| "grad_norm": 0.231741800904274, | |
| "learning_rate": 0.00019035440263061749, | |
| "loss": 0.6563, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.05032822757111598, | |
| "grad_norm": 0.2353263646364212, | |
| "learning_rate": 0.00019028132992327367, | |
| "loss": 0.7612, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.050692924872355945, | |
| "grad_norm": 0.21605569124221802, | |
| "learning_rate": 0.00019020825721592985, | |
| "loss": 0.6007, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.05105762217359591, | |
| "grad_norm": 0.25579389929771423, | |
| "learning_rate": 0.00019013518450858606, | |
| "loss": 0.8586, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05142231947483589, | |
| "grad_norm": 0.24197165668010712, | |
| "learning_rate": 0.00019006211180124224, | |
| "loss": 0.7442, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.051787016776075855, | |
| "grad_norm": 0.21442811191082, | |
| "learning_rate": 0.00018998903909389845, | |
| "loss": 0.6356, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05215171407731583, | |
| "grad_norm": 0.23524148762226105, | |
| "learning_rate": 0.00018991596638655463, | |
| "loss": 0.8438, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.0525164113785558, | |
| "grad_norm": 0.1977744698524475, | |
| "learning_rate": 0.0001898428936792108, | |
| "loss": 0.6249, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05288110867979577, | |
| "grad_norm": 0.22285562753677368, | |
| "learning_rate": 0.00018976982097186702, | |
| "loss": 0.6603, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.05324580598103574, | |
| "grad_norm": 0.223031684756279, | |
| "learning_rate": 0.00018969674826452323, | |
| "loss": 0.7324, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.053610503282275714, | |
| "grad_norm": 0.200596883893013, | |
| "learning_rate": 0.00018962367555717938, | |
| "loss": 0.546, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.05397520058351568, | |
| "grad_norm": 0.23952823877334595, | |
| "learning_rate": 0.0001895506028498356, | |
| "loss": 0.909, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.054339897884755656, | |
| "grad_norm": 0.15994083881378174, | |
| "learning_rate": 0.0001894775301424918, | |
| "loss": 0.3073, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.05470459518599562, | |
| "grad_norm": 0.2209351807832718, | |
| "learning_rate": 0.00018940445743514798, | |
| "loss": 0.6757, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05506929248723559, | |
| "grad_norm": 0.19749605655670166, | |
| "learning_rate": 0.0001893313847278042, | |
| "loss": 0.5071, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.055433989788475566, | |
| "grad_norm": 0.23921646177768707, | |
| "learning_rate": 0.00018925831202046037, | |
| "loss": 0.7567, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.05579868708971553, | |
| "grad_norm": 0.21612580120563507, | |
| "learning_rate": 0.00018918523931311655, | |
| "loss": 0.6329, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.05616338439095551, | |
| "grad_norm": 0.21304431557655334, | |
| "learning_rate": 0.00018911216660577276, | |
| "loss": 0.6612, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.056528081692195475, | |
| "grad_norm": 0.23509572446346283, | |
| "learning_rate": 0.00018903909389842897, | |
| "loss": 0.6723, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.05689277899343545, | |
| "grad_norm": 0.23066239058971405, | |
| "learning_rate": 0.00018896602119108512, | |
| "loss": 0.7588, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05725747629467542, | |
| "grad_norm": 0.2865854799747467, | |
| "learning_rate": 0.00018889294848374133, | |
| "loss": 1.0649, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.05762217359591539, | |
| "grad_norm": 0.21136869490146637, | |
| "learning_rate": 0.00018881987577639754, | |
| "loss": 0.6532, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05798687089715536, | |
| "grad_norm": 0.2841559648513794, | |
| "learning_rate": 0.00018874680306905372, | |
| "loss": 1.047, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.058351568198395334, | |
| "grad_norm": 0.23411215841770172, | |
| "learning_rate": 0.0001886737303617099, | |
| "loss": 0.6954, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0587162654996353, | |
| "grad_norm": 0.25536251068115234, | |
| "learning_rate": 0.0001886006576543661, | |
| "loss": 0.8371, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.05908096280087528, | |
| "grad_norm": 0.24174682796001434, | |
| "learning_rate": 0.0001885275849470223, | |
| "loss": 0.6944, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.059445660102115244, | |
| "grad_norm": 0.21550963819026947, | |
| "learning_rate": 0.0001884545122396785, | |
| "loss": 0.6978, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.05981035740335521, | |
| "grad_norm": 0.1993122398853302, | |
| "learning_rate": 0.00018838143953233468, | |
| "loss": 0.4768, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.060175054704595186, | |
| "grad_norm": 0.22367194294929504, | |
| "learning_rate": 0.00018830836682499086, | |
| "loss": 0.6238, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.060539752005835154, | |
| "grad_norm": 0.20643705129623413, | |
| "learning_rate": 0.00018823529411764707, | |
| "loss": 0.598, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.06090444930707513, | |
| "grad_norm": 0.21671368181705475, | |
| "learning_rate": 0.00018816222141030325, | |
| "loss": 0.7071, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.061269146608315096, | |
| "grad_norm": 0.19023250043392181, | |
| "learning_rate": 0.00018808914870295946, | |
| "loss": 0.4923, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.06163384390955507, | |
| "grad_norm": 0.26334115862846375, | |
| "learning_rate": 0.00018801607599561564, | |
| "loss": 0.8478, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.06199854121079504, | |
| "grad_norm": 0.2204032838344574, | |
| "learning_rate": 0.00018794300328827182, | |
| "loss": 0.6593, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06236323851203501, | |
| "grad_norm": 0.20991367101669312, | |
| "learning_rate": 0.00018786993058092803, | |
| "loss": 0.6712, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.06272793581327499, | |
| "grad_norm": 0.2253231555223465, | |
| "learning_rate": 0.00018779685787358424, | |
| "loss": 0.6589, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.06309263311451495, | |
| "grad_norm": 0.1936040222644806, | |
| "learning_rate": 0.00018772378516624042, | |
| "loss": 0.515, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.06345733041575492, | |
| "grad_norm": 0.24025693535804749, | |
| "learning_rate": 0.0001876507124588966, | |
| "loss": 0.8256, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.0638220277169949, | |
| "grad_norm": 0.2465592622756958, | |
| "learning_rate": 0.0001875776397515528, | |
| "loss": 0.9898, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.06418672501823487, | |
| "grad_norm": 0.20442670583724976, | |
| "learning_rate": 0.000187504567044209, | |
| "loss": 0.6724, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06455142231947483, | |
| "grad_norm": 0.19670630991458893, | |
| "learning_rate": 0.0001874314943368652, | |
| "loss": 0.6449, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.06491611962071481, | |
| "grad_norm": 0.1966039091348648, | |
| "learning_rate": 0.00018735842162952138, | |
| "loss": 0.787, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.06528081692195478, | |
| "grad_norm": 0.21741807460784912, | |
| "learning_rate": 0.00018728534892217756, | |
| "loss": 0.8073, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.06564551422319474, | |
| "grad_norm": 0.20191621780395508, | |
| "learning_rate": 0.00018721227621483377, | |
| "loss": 0.6297, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06601021152443472, | |
| "grad_norm": 0.21392996609210968, | |
| "learning_rate": 0.00018713920350748998, | |
| "loss": 0.6611, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.06637490882567469, | |
| "grad_norm": 0.21892090141773224, | |
| "learning_rate": 0.00018706613080014613, | |
| "loss": 0.7913, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.06673960612691467, | |
| "grad_norm": 0.240064337849617, | |
| "learning_rate": 0.00018699305809280234, | |
| "loss": 0.74, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.06710430342815463, | |
| "grad_norm": 0.2156010866165161, | |
| "learning_rate": 0.00018691998538545855, | |
| "loss": 0.6928, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0674690007293946, | |
| "grad_norm": 0.19305868446826935, | |
| "learning_rate": 0.00018684691267811473, | |
| "loss": 0.6712, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06783369803063458, | |
| "grad_norm": 0.24134770035743713, | |
| "learning_rate": 0.00018677383997077094, | |
| "loss": 0.7784, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.06819839533187455, | |
| "grad_norm": 0.21951039135456085, | |
| "learning_rate": 0.00018670076726342712, | |
| "loss": 0.7536, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.06856309263311451, | |
| "grad_norm": 0.22668084502220154, | |
| "learning_rate": 0.0001866276945560833, | |
| "loss": 0.7914, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06892778993435449, | |
| "grad_norm": 0.23703083395957947, | |
| "learning_rate": 0.0001865546218487395, | |
| "loss": 0.8562, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.06929248723559446, | |
| "grad_norm": 0.18408019840717316, | |
| "learning_rate": 0.00018648154914139572, | |
| "loss": 0.5408, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06965718453683442, | |
| "grad_norm": 0.168971985578537, | |
| "learning_rate": 0.00018640847643405187, | |
| "loss": 0.5145, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.0700218818380744, | |
| "grad_norm": 0.2313617318868637, | |
| "learning_rate": 0.00018633540372670808, | |
| "loss": 0.7116, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07038657913931437, | |
| "grad_norm": 0.22079357504844666, | |
| "learning_rate": 0.0001862623310193643, | |
| "loss": 0.7453, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.07075127644055434, | |
| "grad_norm": 0.20398060977458954, | |
| "learning_rate": 0.00018618925831202047, | |
| "loss": 0.6914, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.0711159737417943, | |
| "grad_norm": 0.21486474573612213, | |
| "learning_rate": 0.00018611618560467665, | |
| "loss": 0.7252, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07148067104303428, | |
| "grad_norm": 0.21152055263519287, | |
| "learning_rate": 0.00018604311289733286, | |
| "loss": 0.676, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.07184536834427425, | |
| "grad_norm": 0.2242351919412613, | |
| "learning_rate": 0.00018597004018998904, | |
| "loss": 0.8428, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.07221006564551423, | |
| "grad_norm": 0.19831842184066772, | |
| "learning_rate": 0.00018589696748264525, | |
| "loss": 0.6379, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07257476294675419, | |
| "grad_norm": 0.2057400494813919, | |
| "learning_rate": 0.00018582389477530143, | |
| "loss": 0.7417, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.07293946024799416, | |
| "grad_norm": 0.17779530584812164, | |
| "learning_rate": 0.0001857508220679576, | |
| "loss": 0.6891, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07330415754923414, | |
| "grad_norm": 0.20354579389095306, | |
| "learning_rate": 0.00018567774936061382, | |
| "loss": 0.7052, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.0736688548504741, | |
| "grad_norm": 0.17558813095092773, | |
| "learning_rate": 0.00018560467665327, | |
| "loss": 0.6295, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07403355215171407, | |
| "grad_norm": 0.1900980919599533, | |
| "learning_rate": 0.0001855316039459262, | |
| "loss": 0.6312, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.07439824945295405, | |
| "grad_norm": 0.2224852293729782, | |
| "learning_rate": 0.0001854585312385824, | |
| "loss": 0.7633, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.07476294675419402, | |
| "grad_norm": 0.2300332933664322, | |
| "learning_rate": 0.00018538545853123857, | |
| "loss": 0.8564, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.07512764405543398, | |
| "grad_norm": 0.24671219289302826, | |
| "learning_rate": 0.00018531238582389478, | |
| "loss": 0.9927, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07549234135667396, | |
| "grad_norm": 0.1760961413383484, | |
| "learning_rate": 0.000185239313116551, | |
| "loss": 0.5943, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.07585703865791393, | |
| "grad_norm": 0.25457900762557983, | |
| "learning_rate": 0.00018516624040920717, | |
| "loss": 1.0929, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.07622173595915391, | |
| "grad_norm": 0.1756330132484436, | |
| "learning_rate": 0.00018509316770186335, | |
| "loss": 0.5191, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.07658643326039387, | |
| "grad_norm": 0.22464942932128906, | |
| "learning_rate": 0.00018502009499451956, | |
| "loss": 0.9786, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07695113056163384, | |
| "grad_norm": 0.17986498773097992, | |
| "learning_rate": 0.00018494702228717574, | |
| "loss": 0.5692, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.07731582786287382, | |
| "grad_norm": 0.20685848593711853, | |
| "learning_rate": 0.00018487394957983195, | |
| "loss": 0.7599, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.07768052516411379, | |
| "grad_norm": 0.19135037064552307, | |
| "learning_rate": 0.00018480087687248813, | |
| "loss": 0.5762, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.07804522246535375, | |
| "grad_norm": 0.18071472644805908, | |
| "learning_rate": 0.00018472780416514431, | |
| "loss": 0.4915, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.07840991976659373, | |
| "grad_norm": 0.2219233512878418, | |
| "learning_rate": 0.00018465473145780052, | |
| "loss": 0.9144, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0787746170678337, | |
| "grad_norm": 0.20180021226406097, | |
| "learning_rate": 0.00018458165875045673, | |
| "loss": 0.753, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.07913931436907366, | |
| "grad_norm": 0.18585549294948578, | |
| "learning_rate": 0.00018450858604311289, | |
| "loss": 0.6177, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.07950401167031364, | |
| "grad_norm": 0.21946971118450165, | |
| "learning_rate": 0.0001844355133357691, | |
| "loss": 0.8311, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.07986870897155361, | |
| "grad_norm": 0.22317932546138763, | |
| "learning_rate": 0.0001843624406284253, | |
| "loss": 0.831, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.08023340627279359, | |
| "grad_norm": 0.20973660051822662, | |
| "learning_rate": 0.00018428936792108148, | |
| "loss": 0.718, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08059810357403355, | |
| "grad_norm": 0.19049975275993347, | |
| "learning_rate": 0.0001842162952137377, | |
| "loss": 0.6143, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.08096280087527352, | |
| "grad_norm": 0.22633568942546844, | |
| "learning_rate": 0.00018414322250639387, | |
| "loss": 0.7479, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.0813274981765135, | |
| "grad_norm": 0.22375738620758057, | |
| "learning_rate": 0.00018407014979905005, | |
| "loss": 0.783, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.08169219547775347, | |
| "grad_norm": 0.20708216726779938, | |
| "learning_rate": 0.00018399707709170626, | |
| "loss": 0.7068, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.08205689277899343, | |
| "grad_norm": 0.2057557851076126, | |
| "learning_rate": 0.00018392400438436247, | |
| "loss": 0.7246, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0824215900802334, | |
| "grad_norm": 0.21535396575927734, | |
| "learning_rate": 0.00018385093167701863, | |
| "loss": 0.6725, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.08278628738147338, | |
| "grad_norm": 0.17876707017421722, | |
| "learning_rate": 0.00018377785896967483, | |
| "loss": 0.5595, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.08315098468271334, | |
| "grad_norm": 0.17822523415088654, | |
| "learning_rate": 0.00018370478626233104, | |
| "loss": 0.5312, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08351568198395332, | |
| "grad_norm": 0.24496309459209442, | |
| "learning_rate": 0.00018363171355498722, | |
| "loss": 1.0328, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.08388037928519329, | |
| "grad_norm": 0.2105628103017807, | |
| "learning_rate": 0.0001835586408476434, | |
| "loss": 0.6597, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08424507658643327, | |
| "grad_norm": 0.1954609751701355, | |
| "learning_rate": 0.0001834855681402996, | |
| "loss": 0.7691, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.08460977388767323, | |
| "grad_norm": 0.18443147838115692, | |
| "learning_rate": 0.0001834124954329558, | |
| "loss": 0.634, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.0849744711889132, | |
| "grad_norm": 0.20477977395057678, | |
| "learning_rate": 0.000183339422725612, | |
| "loss": 0.6758, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.08533916849015317, | |
| "grad_norm": 0.21266460418701172, | |
| "learning_rate": 0.00018326635001826818, | |
| "loss": 0.9446, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.08570386579139315, | |
| "grad_norm": 0.2169213891029358, | |
| "learning_rate": 0.00018319327731092437, | |
| "loss": 0.755, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.08606856309263311, | |
| "grad_norm": 0.19145172834396362, | |
| "learning_rate": 0.00018312020460358057, | |
| "loss": 0.7846, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08643326039387308, | |
| "grad_norm": 0.1787746399641037, | |
| "learning_rate": 0.00018304713189623676, | |
| "loss": 0.6359, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.08679795769511306, | |
| "grad_norm": 0.21680796146392822, | |
| "learning_rate": 0.00018297405918889296, | |
| "loss": 0.6408, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.08716265499635302, | |
| "grad_norm": 0.20149710774421692, | |
| "learning_rate": 0.00018290098648154914, | |
| "loss": 0.695, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.087527352297593, | |
| "grad_norm": 0.20793262124061584, | |
| "learning_rate": 0.00018282791377420533, | |
| "loss": 0.7159, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08789204959883297, | |
| "grad_norm": 0.21236906945705414, | |
| "learning_rate": 0.00018275484106686153, | |
| "loss": 0.8924, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.08825674690007294, | |
| "grad_norm": 0.17754322290420532, | |
| "learning_rate": 0.00018268176835951774, | |
| "loss": 0.5931, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.0886214442013129, | |
| "grad_norm": 0.21361956000328064, | |
| "learning_rate": 0.00018260869565217392, | |
| "loss": 0.852, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.08898614150255288, | |
| "grad_norm": 0.19755081832408905, | |
| "learning_rate": 0.0001825356229448301, | |
| "loss": 0.7846, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.08935083880379285, | |
| "grad_norm": 0.17052070796489716, | |
| "learning_rate": 0.00018246255023748631, | |
| "loss": 0.4757, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.08971553610503283, | |
| "grad_norm": 0.17633679509162903, | |
| "learning_rate": 0.0001823894775301425, | |
| "loss": 0.3075, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.09008023340627279, | |
| "grad_norm": 0.18541620671749115, | |
| "learning_rate": 0.0001823164048227987, | |
| "loss": 0.6268, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.09044493070751276, | |
| "grad_norm": 0.18999099731445312, | |
| "learning_rate": 0.00018224333211545488, | |
| "loss": 0.6236, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.09080962800875274, | |
| "grad_norm": 0.17926354706287384, | |
| "learning_rate": 0.00018217025940811107, | |
| "loss": 0.6153, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.09117432530999271, | |
| "grad_norm": 0.161162868142128, | |
| "learning_rate": 0.00018209718670076727, | |
| "loss": 0.436, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09153902261123267, | |
| "grad_norm": 0.17199409008026123, | |
| "learning_rate": 0.00018202411399342348, | |
| "loss": 0.5008, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.09190371991247265, | |
| "grad_norm": 0.20446273684501648, | |
| "learning_rate": 0.00018195104128607964, | |
| "loss": 0.8099, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.09226841721371262, | |
| "grad_norm": 0.22668351233005524, | |
| "learning_rate": 0.00018187796857873585, | |
| "loss": 0.8039, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.09263311451495258, | |
| "grad_norm": 0.20580340921878815, | |
| "learning_rate": 0.00018180489587139205, | |
| "loss": 0.8551, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.09299781181619256, | |
| "grad_norm": 0.20698761940002441, | |
| "learning_rate": 0.00018173182316404824, | |
| "loss": 0.7958, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.09336250911743253, | |
| "grad_norm": 0.18675172328948975, | |
| "learning_rate": 0.00018165875045670444, | |
| "loss": 0.6952, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.0937272064186725, | |
| "grad_norm": 0.21192388236522675, | |
| "learning_rate": 0.00018158567774936063, | |
| "loss": 0.906, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.09409190371991247, | |
| "grad_norm": 0.20454420149326324, | |
| "learning_rate": 0.0001815126050420168, | |
| "loss": 0.8052, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.09445660102115244, | |
| "grad_norm": 0.2223392277956009, | |
| "learning_rate": 0.00018143953233467301, | |
| "loss": 1.0307, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.09482129832239242, | |
| "grad_norm": 0.1847476363182068, | |
| "learning_rate": 0.00018136645962732922, | |
| "loss": 0.7384, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09518599562363239, | |
| "grad_norm": 0.1858607977628708, | |
| "learning_rate": 0.00018129338691998538, | |
| "loss": 0.6157, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.09555069292487235, | |
| "grad_norm": 0.19958609342575073, | |
| "learning_rate": 0.00018122031421264159, | |
| "loss": 0.7196, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.09591539022611233, | |
| "grad_norm": 0.22875526547431946, | |
| "learning_rate": 0.0001811472415052978, | |
| "loss": 0.864, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.0962800875273523, | |
| "grad_norm": 0.15967297554016113, | |
| "learning_rate": 0.00018107416879795398, | |
| "loss": 0.5185, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.09664478482859226, | |
| "grad_norm": 0.15776892006397247, | |
| "learning_rate": 0.00018100109609061016, | |
| "loss": 0.5136, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.09700948212983224, | |
| "grad_norm": 0.17968744039535522, | |
| "learning_rate": 0.00018092802338326637, | |
| "loss": 0.6538, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.09737417943107221, | |
| "grad_norm": 0.19834838807582855, | |
| "learning_rate": 0.00018085495067592255, | |
| "loss": 0.7448, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.09773887673231219, | |
| "grad_norm": 0.18585754930973053, | |
| "learning_rate": 0.00018078187796857875, | |
| "loss": 0.6166, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.09810357403355215, | |
| "grad_norm": 0.21760611236095428, | |
| "learning_rate": 0.00018070880526123494, | |
| "loss": 0.9431, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.09846827133479212, | |
| "grad_norm": 0.17871254682540894, | |
| "learning_rate": 0.00018063573255389112, | |
| "loss": 0.5939, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0988329686360321, | |
| "grad_norm": 0.16932818293571472, | |
| "learning_rate": 0.00018056265984654733, | |
| "loss": 0.5615, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.09919766593727207, | |
| "grad_norm": 0.18163429200649261, | |
| "learning_rate": 0.0001804895871392035, | |
| "loss": 0.6572, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.09956236323851203, | |
| "grad_norm": 0.17625723779201508, | |
| "learning_rate": 0.00018041651443185972, | |
| "loss": 0.5748, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.099927060539752, | |
| "grad_norm": 0.19214142858982086, | |
| "learning_rate": 0.0001803434417245159, | |
| "loss": 0.7589, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.10029175784099198, | |
| "grad_norm": 0.19191130995750427, | |
| "learning_rate": 0.00018027036901717208, | |
| "loss": 0.7201, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.10065645514223195, | |
| "grad_norm": 0.18246126174926758, | |
| "learning_rate": 0.0001801972963098283, | |
| "loss": 0.7919, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.10102115244347191, | |
| "grad_norm": 0.17984451353549957, | |
| "learning_rate": 0.0001801242236024845, | |
| "loss": 0.6746, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.10138584974471189, | |
| "grad_norm": 0.18245050311088562, | |
| "learning_rate": 0.00018005115089514068, | |
| "loss": 0.5672, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.10175054704595186, | |
| "grad_norm": 0.1899084746837616, | |
| "learning_rate": 0.00017997807818779686, | |
| "loss": 0.6676, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.10211524434719182, | |
| "grad_norm": 0.21027837693691254, | |
| "learning_rate": 0.00017990500548045307, | |
| "loss": 0.8676, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1024799416484318, | |
| "grad_norm": 0.178825244307518, | |
| "learning_rate": 0.00017983193277310925, | |
| "loss": 0.5617, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.10284463894967177, | |
| "grad_norm": 0.18551243841648102, | |
| "learning_rate": 0.00017975886006576546, | |
| "loss": 0.7091, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.10320933625091175, | |
| "grad_norm": 0.18075218796730042, | |
| "learning_rate": 0.00017968578735842164, | |
| "loss": 0.7194, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.10357403355215171, | |
| "grad_norm": 0.1333538293838501, | |
| "learning_rate": 0.00017961271465107782, | |
| "loss": 0.3161, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.10393873085339168, | |
| "grad_norm": 0.18160590529441833, | |
| "learning_rate": 0.00017953964194373403, | |
| "loss": 0.7355, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.10430342815463166, | |
| "grad_norm": 0.19034148752689362, | |
| "learning_rate": 0.00017946656923639024, | |
| "loss": 0.7279, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.10466812545587163, | |
| "grad_norm": 0.18591929972171783, | |
| "learning_rate": 0.0001793934965290464, | |
| "loss": 0.7353, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.1050328227571116, | |
| "grad_norm": 0.19063667953014374, | |
| "learning_rate": 0.0001793204238217026, | |
| "loss": 0.7322, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.10539752005835157, | |
| "grad_norm": 0.20110805332660675, | |
| "learning_rate": 0.0001792473511143588, | |
| "loss": 0.869, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.10576221735959154, | |
| "grad_norm": 0.18062824010849, | |
| "learning_rate": 0.000179174278407015, | |
| "loss": 0.6839, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1061269146608315, | |
| "grad_norm": 0.21643158793449402, | |
| "learning_rate": 0.00017910120569967117, | |
| "loss": 0.7626, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.10649161196207148, | |
| "grad_norm": 0.20745426416397095, | |
| "learning_rate": 0.00017902813299232738, | |
| "loss": 0.8748, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.10685630926331145, | |
| "grad_norm": 0.1999918520450592, | |
| "learning_rate": 0.00017895506028498356, | |
| "loss": 0.7267, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.10722100656455143, | |
| "grad_norm": 0.17721255123615265, | |
| "learning_rate": 0.00017888198757763977, | |
| "loss": 0.788, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.10758570386579139, | |
| "grad_norm": 0.19578172266483307, | |
| "learning_rate": 0.00017880891487029598, | |
| "loss": 0.6801, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.10795040116703136, | |
| "grad_norm": 0.20748168230056763, | |
| "learning_rate": 0.00017873584216295213, | |
| "loss": 0.7624, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.10831509846827134, | |
| "grad_norm": 0.17874249815940857, | |
| "learning_rate": 0.00017866276945560834, | |
| "loss": 0.7029, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.10867979576951131, | |
| "grad_norm": 0.21527762711048126, | |
| "learning_rate": 0.00017858969674826455, | |
| "loss": 0.9119, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.10904449307075127, | |
| "grad_norm": 0.16523544490337372, | |
| "learning_rate": 0.00017851662404092073, | |
| "loss": 0.406, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.10940919037199125, | |
| "grad_norm": 0.18175430595874786, | |
| "learning_rate": 0.0001784435513335769, | |
| "loss": 0.6711, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10977388767323122, | |
| "grad_norm": 0.19601181149482727, | |
| "learning_rate": 0.00017837047862623312, | |
| "loss": 0.7117, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.11013858497447118, | |
| "grad_norm": 0.1821203976869583, | |
| "learning_rate": 0.0001782974059188893, | |
| "loss": 0.6163, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.11050328227571116, | |
| "grad_norm": 0.1985776126384735, | |
| "learning_rate": 0.0001782243332115455, | |
| "loss": 0.6357, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.11086797957695113, | |
| "grad_norm": 0.18439283967018127, | |
| "learning_rate": 0.0001781512605042017, | |
| "loss": 0.6007, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1112326768781911, | |
| "grad_norm": 0.19900138676166534, | |
| "learning_rate": 0.00017807818779685787, | |
| "loss": 0.794, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.11159737417943107, | |
| "grad_norm": 0.1659189611673355, | |
| "learning_rate": 0.00017800511508951408, | |
| "loss": 0.4824, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.11196207148067104, | |
| "grad_norm": 0.19788797199726105, | |
| "learning_rate": 0.00017793204238217026, | |
| "loss": 0.6806, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.11232676878191102, | |
| "grad_norm": 0.19467923045158386, | |
| "learning_rate": 0.00017785896967482647, | |
| "loss": 0.7515, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.11269146608315099, | |
| "grad_norm": 0.20231202244758606, | |
| "learning_rate": 0.00017778589696748265, | |
| "loss": 0.7302, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.11305616338439095, | |
| "grad_norm": 0.1800953596830368, | |
| "learning_rate": 0.00017771282426013883, | |
| "loss": 0.5432, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11342086068563093, | |
| "grad_norm": 0.15572136640548706, | |
| "learning_rate": 0.00017763975155279504, | |
| "loss": 0.4674, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.1137855579868709, | |
| "grad_norm": 0.1948738545179367, | |
| "learning_rate": 0.00017756667884545125, | |
| "loss": 0.8442, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.11415025528811087, | |
| "grad_norm": 0.1789115071296692, | |
| "learning_rate": 0.00017749360613810743, | |
| "loss": 0.6009, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.11451495258935084, | |
| "grad_norm": 0.22927749156951904, | |
| "learning_rate": 0.0001774205334307636, | |
| "loss": 0.866, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.11487964989059081, | |
| "grad_norm": 0.2268659621477127, | |
| "learning_rate": 0.00017734746072341982, | |
| "loss": 0.918, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.11524434719183078, | |
| "grad_norm": 0.14317840337753296, | |
| "learning_rate": 0.000177274388016076, | |
| "loss": 0.3706, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.11560904449307075, | |
| "grad_norm": 0.1896270215511322, | |
| "learning_rate": 0.0001772013153087322, | |
| "loss": 0.7108, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.11597374179431072, | |
| "grad_norm": 0.18470154702663422, | |
| "learning_rate": 0.0001771282426013884, | |
| "loss": 0.8041, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1163384390955507, | |
| "grad_norm": 0.1888645738363266, | |
| "learning_rate": 0.00017705516989404457, | |
| "loss": 0.7601, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.11670313639679067, | |
| "grad_norm": 0.19202347099781036, | |
| "learning_rate": 0.00017698209718670078, | |
| "loss": 0.6529, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11706783369803063, | |
| "grad_norm": 0.18936879932880402, | |
| "learning_rate": 0.000176909024479357, | |
| "loss": 0.768, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1174325309992706, | |
| "grad_norm": 0.18530336022377014, | |
| "learning_rate": 0.00017683595177201314, | |
| "loss": 0.7997, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.11779722830051058, | |
| "grad_norm": 0.1945243924856186, | |
| "learning_rate": 0.00017676287906466935, | |
| "loss": 0.8864, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.11816192560175055, | |
| "grad_norm": 0.16887742280960083, | |
| "learning_rate": 0.00017668980635732556, | |
| "loss": 0.8014, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.11852662290299051, | |
| "grad_norm": 0.20715931057929993, | |
| "learning_rate": 0.00017661673364998174, | |
| "loss": 0.9641, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.11889132020423049, | |
| "grad_norm": 0.18339566886425018, | |
| "learning_rate": 0.00017654366094263792, | |
| "loss": 0.6723, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.11925601750547046, | |
| "grad_norm": 0.1606166958808899, | |
| "learning_rate": 0.00017647058823529413, | |
| "loss": 0.499, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.11962071480671042, | |
| "grad_norm": 0.17019236087799072, | |
| "learning_rate": 0.0001763975155279503, | |
| "loss": 0.7332, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.1199854121079504, | |
| "grad_norm": 0.15148764848709106, | |
| "learning_rate": 0.00017632444282060652, | |
| "loss": 0.477, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.12035010940919037, | |
| "grad_norm": 0.182627871632576, | |
| "learning_rate": 0.00017625137011326273, | |
| "loss": 0.7443, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12071480671043035, | |
| "grad_norm": 0.18017525970935822, | |
| "learning_rate": 0.00017617829740591888, | |
| "loss": 0.6439, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.12107950401167031, | |
| "grad_norm": 0.1846102774143219, | |
| "learning_rate": 0.0001761052246985751, | |
| "loss": 0.7023, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.12144420131291028, | |
| "grad_norm": 0.19771961867809296, | |
| "learning_rate": 0.0001760321519912313, | |
| "loss": 0.7772, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.12180889861415026, | |
| "grad_norm": 0.18164925277233124, | |
| "learning_rate": 0.00017595907928388748, | |
| "loss": 0.7348, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.12217359591539023, | |
| "grad_norm": 0.182021364569664, | |
| "learning_rate": 0.00017588600657654366, | |
| "loss": 0.7034, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.12253829321663019, | |
| "grad_norm": 0.1674763709306717, | |
| "learning_rate": 0.00017581293386919987, | |
| "loss": 0.5969, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.12290299051787017, | |
| "grad_norm": 0.18998658657073975, | |
| "learning_rate": 0.00017573986116185605, | |
| "loss": 0.723, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.12326768781911014, | |
| "grad_norm": 0.20089052617549896, | |
| "learning_rate": 0.00017566678845451226, | |
| "loss": 0.8216, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.12363238512035012, | |
| "grad_norm": 0.18273848295211792, | |
| "learning_rate": 0.00017559371574716844, | |
| "loss": 0.6634, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.12399708242159008, | |
| "grad_norm": 0.18198589980602264, | |
| "learning_rate": 0.00017552064303982462, | |
| "loss": 0.7026, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12436177972283005, | |
| "grad_norm": 0.17685921490192413, | |
| "learning_rate": 0.00017544757033248083, | |
| "loss": 0.5641, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.12472647702407003, | |
| "grad_norm": 0.20872358977794647, | |
| "learning_rate": 0.000175374497625137, | |
| "loss": 0.851, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.12509117432531, | |
| "grad_norm": 0.19250471889972687, | |
| "learning_rate": 0.00017530142491779322, | |
| "loss": 0.6265, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.12545587162654998, | |
| "grad_norm": 0.18738161027431488, | |
| "learning_rate": 0.0001752283522104494, | |
| "loss": 0.7612, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.12582056892778992, | |
| "grad_norm": 0.188717782497406, | |
| "learning_rate": 0.00017515527950310558, | |
| "loss": 0.768, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1261852662290299, | |
| "grad_norm": 0.16923002898693085, | |
| "learning_rate": 0.0001750822067957618, | |
| "loss": 0.6071, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.12654996353026987, | |
| "grad_norm": 0.1732785850763321, | |
| "learning_rate": 0.000175009134088418, | |
| "loss": 0.5788, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.12691466083150985, | |
| "grad_norm": 0.15573865175247192, | |
| "learning_rate": 0.00017493606138107418, | |
| "loss": 0.5516, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.12727935813274982, | |
| "grad_norm": 0.17997129261493683, | |
| "learning_rate": 0.00017486298867373036, | |
| "loss": 0.5627, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.1276440554339898, | |
| "grad_norm": 0.17864130437374115, | |
| "learning_rate": 0.00017478991596638657, | |
| "loss": 0.6458, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12800875273522977, | |
| "grad_norm": 0.17441441118717194, | |
| "learning_rate": 0.00017471684325904275, | |
| "loss": 0.5879, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.12837345003646974, | |
| "grad_norm": 0.16584378480911255, | |
| "learning_rate": 0.00017464377055169896, | |
| "loss": 0.571, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.1287381473377097, | |
| "grad_norm": 0.16552752256393433, | |
| "learning_rate": 0.00017457069784435514, | |
| "loss": 0.4731, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.12910284463894967, | |
| "grad_norm": 0.19212619960308075, | |
| "learning_rate": 0.00017449762513701132, | |
| "loss": 0.7846, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.12946754194018964, | |
| "grad_norm": 0.15730014443397522, | |
| "learning_rate": 0.00017442455242966753, | |
| "loss": 0.5544, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.12983223924142961, | |
| "grad_norm": 0.16712331771850586, | |
| "learning_rate": 0.00017435147972232374, | |
| "loss": 0.5702, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1301969365426696, | |
| "grad_norm": 0.19101271033287048, | |
| "learning_rate": 0.0001742784070149799, | |
| "loss": 0.697, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.13056163384390956, | |
| "grad_norm": 0.15471835434436798, | |
| "learning_rate": 0.0001742053343076361, | |
| "loss": 0.4917, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.13092633114514954, | |
| "grad_norm": 0.19589225947856903, | |
| "learning_rate": 0.0001741322616002923, | |
| "loss": 0.7135, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.13129102844638948, | |
| "grad_norm": 0.22023500502109528, | |
| "learning_rate": 0.0001740591888929485, | |
| "loss": 0.8744, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13165572574762946, | |
| "grad_norm": 0.17470814287662506, | |
| "learning_rate": 0.00017398611618560467, | |
| "loss": 0.5644, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.13202042304886943, | |
| "grad_norm": 0.16508780419826508, | |
| "learning_rate": 0.00017391304347826088, | |
| "loss": 0.4774, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.1323851203501094, | |
| "grad_norm": 0.18133142590522766, | |
| "learning_rate": 0.00017383997077091706, | |
| "loss": 0.6806, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.13274981765134938, | |
| "grad_norm": 0.19952940940856934, | |
| "learning_rate": 0.00017376689806357327, | |
| "loss": 0.7776, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.13311451495258936, | |
| "grad_norm": 0.17585842311382294, | |
| "learning_rate": 0.00017369382535622948, | |
| "loss": 0.7543, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.13347921225382933, | |
| "grad_norm": 0.19367621839046478, | |
| "learning_rate": 0.00017362075264888563, | |
| "loss": 0.585, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.13384390955506928, | |
| "grad_norm": 0.1928108036518097, | |
| "learning_rate": 0.00017354767994154184, | |
| "loss": 0.6656, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.13420860685630925, | |
| "grad_norm": 0.19390811026096344, | |
| "learning_rate": 0.00017347460723419805, | |
| "loss": 0.713, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.13457330415754923, | |
| "grad_norm": 0.19284166395664215, | |
| "learning_rate": 0.00017340153452685423, | |
| "loss": 0.7414, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.1349380014587892, | |
| "grad_norm": 0.16773255169391632, | |
| "learning_rate": 0.0001733284618195104, | |
| "loss": 0.5786, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13530269876002918, | |
| "grad_norm": 0.15630416572093964, | |
| "learning_rate": 0.00017325538911216662, | |
| "loss": 0.4774, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.13566739606126915, | |
| "grad_norm": 0.1833135038614273, | |
| "learning_rate": 0.0001731823164048228, | |
| "loss": 0.7178, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.13603209336250913, | |
| "grad_norm": 0.19849684834480286, | |
| "learning_rate": 0.000173109243697479, | |
| "loss": 0.7354, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.1363967906637491, | |
| "grad_norm": 0.17072516679763794, | |
| "learning_rate": 0.0001730361709901352, | |
| "loss": 0.5945, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.13676148796498905, | |
| "grad_norm": 0.1784793585538864, | |
| "learning_rate": 0.00017296309828279137, | |
| "loss": 0.6322, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.13712618526622902, | |
| "grad_norm": 0.192670539021492, | |
| "learning_rate": 0.00017289002557544758, | |
| "loss": 0.7612, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.137490882567469, | |
| "grad_norm": 0.1875181794166565, | |
| "learning_rate": 0.00017281695286810376, | |
| "loss": 0.7439, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.13785557986870897, | |
| "grad_norm": 0.1796884834766388, | |
| "learning_rate": 0.00017274388016075997, | |
| "loss": 0.8694, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.13822027716994895, | |
| "grad_norm": 0.20059190690517426, | |
| "learning_rate": 0.00017267080745341615, | |
| "loss": 0.8027, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.13858497447118892, | |
| "grad_norm": 0.19425062835216522, | |
| "learning_rate": 0.00017259773474607236, | |
| "loss": 0.8948, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1389496717724289, | |
| "grad_norm": 0.20872867107391357, | |
| "learning_rate": 0.00017252466203872854, | |
| "loss": 0.9552, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.13931436907366884, | |
| "grad_norm": 0.1698973923921585, | |
| "learning_rate": 0.00017245158933138475, | |
| "loss": 0.5604, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.13967906637490882, | |
| "grad_norm": 0.19797375798225403, | |
| "learning_rate": 0.00017237851662404093, | |
| "loss": 0.8872, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.1400437636761488, | |
| "grad_norm": 0.16452281177043915, | |
| "learning_rate": 0.00017230544391669711, | |
| "loss": 0.5724, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.14040846097738877, | |
| "grad_norm": 0.17133468389511108, | |
| "learning_rate": 0.00017223237120935332, | |
| "loss": 0.592, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.14077315827862874, | |
| "grad_norm": 0.17517192661762238, | |
| "learning_rate": 0.0001721592985020095, | |
| "loss": 0.5597, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.14113785557986872, | |
| "grad_norm": 0.17406406998634338, | |
| "learning_rate": 0.00017208622579466569, | |
| "loss": 0.7118, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.1415025528811087, | |
| "grad_norm": 0.17395231127738953, | |
| "learning_rate": 0.0001720131530873219, | |
| "loss": 0.6616, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.14186725018234866, | |
| "grad_norm": 0.21478557586669922, | |
| "learning_rate": 0.00017194008037997807, | |
| "loss": 0.8517, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.1422319474835886, | |
| "grad_norm": 0.22842730581760406, | |
| "learning_rate": 0.00017186700767263428, | |
| "loss": 0.9611, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14259664478482859, | |
| "grad_norm": 0.18962237238883972, | |
| "learning_rate": 0.0001717939349652905, | |
| "loss": 0.7606, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.14296134208606856, | |
| "grad_norm": 0.19581514596939087, | |
| "learning_rate": 0.00017172086225794665, | |
| "loss": 0.892, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.14332603938730853, | |
| "grad_norm": 0.18178801238536835, | |
| "learning_rate": 0.00017164778955060285, | |
| "loss": 0.7477, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.1436907366885485, | |
| "grad_norm": 0.18961721658706665, | |
| "learning_rate": 0.00017157471684325906, | |
| "loss": 0.771, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.14405543398978848, | |
| "grad_norm": 0.18671804666519165, | |
| "learning_rate": 0.00017150164413591524, | |
| "loss": 0.7383, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.14442013129102846, | |
| "grad_norm": 0.17833957076072693, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 0.6363, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.1447848285922684, | |
| "grad_norm": 0.18321546912193298, | |
| "learning_rate": 0.00017135549872122763, | |
| "loss": 0.7068, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.14514952589350838, | |
| "grad_norm": 0.20233039557933807, | |
| "learning_rate": 0.00017128242601388381, | |
| "loss": 0.8699, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.14551422319474835, | |
| "grad_norm": 0.1749749630689621, | |
| "learning_rate": 0.00017120935330654002, | |
| "loss": 0.7355, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.14587892049598833, | |
| "grad_norm": 0.14458586275577545, | |
| "learning_rate": 0.00017113628059919623, | |
| "loss": 0.427, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1462436177972283, | |
| "grad_norm": 0.183969646692276, | |
| "learning_rate": 0.00017106320789185239, | |
| "loss": 0.6166, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.14660831509846828, | |
| "grad_norm": 0.15677833557128906, | |
| "learning_rate": 0.0001709901351845086, | |
| "loss": 0.4161, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.14697301239970825, | |
| "grad_norm": 0.20313799381256104, | |
| "learning_rate": 0.0001709170624771648, | |
| "loss": 0.8189, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.1473377097009482, | |
| "grad_norm": 0.17821913957595825, | |
| "learning_rate": 0.00017084398976982098, | |
| "loss": 0.6365, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.14770240700218817, | |
| "grad_norm": 0.1630881130695343, | |
| "learning_rate": 0.00017077091706247717, | |
| "loss": 0.5623, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.14806710430342815, | |
| "grad_norm": 0.20716772973537445, | |
| "learning_rate": 0.00017069784435513337, | |
| "loss": 0.9347, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.14843180160466812, | |
| "grad_norm": 0.17428599298000336, | |
| "learning_rate": 0.00017062477164778956, | |
| "loss": 0.6188, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.1487964989059081, | |
| "grad_norm": 0.15823887288570404, | |
| "learning_rate": 0.00017055169894044576, | |
| "loss": 0.5995, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.14916119620714807, | |
| "grad_norm": 0.1716741919517517, | |
| "learning_rate": 0.00017047862623310194, | |
| "loss": 0.6407, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.14952589350838805, | |
| "grad_norm": 0.16747424006462097, | |
| "learning_rate": 0.00017040555352575813, | |
| "loss": 0.5822, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14989059080962802, | |
| "grad_norm": 0.14587102830410004, | |
| "learning_rate": 0.00017033248081841433, | |
| "loss": 0.431, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.15025528811086797, | |
| "grad_norm": 0.1754886656999588, | |
| "learning_rate": 0.00017025940811107052, | |
| "loss": 0.5515, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.15061998541210794, | |
| "grad_norm": 0.20467835664749146, | |
| "learning_rate": 0.00017018633540372672, | |
| "loss": 0.8614, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.15098468271334792, | |
| "grad_norm": 0.2041863650083542, | |
| "learning_rate": 0.0001701132626963829, | |
| "loss": 0.88, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1513493800145879, | |
| "grad_norm": 0.2153850644826889, | |
| "learning_rate": 0.00017004018998903911, | |
| "loss": 0.9241, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.15171407731582787, | |
| "grad_norm": 0.20441819727420807, | |
| "learning_rate": 0.0001699671172816953, | |
| "loss": 0.9127, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.15207877461706784, | |
| "grad_norm": 0.17975957691669464, | |
| "learning_rate": 0.0001698940445743515, | |
| "loss": 0.6395, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.15244347191830782, | |
| "grad_norm": 0.1702582836151123, | |
| "learning_rate": 0.00016982097186700768, | |
| "loss": 0.5951, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.15280816921954776, | |
| "grad_norm": 0.1543644517660141, | |
| "learning_rate": 0.00016974789915966387, | |
| "loss": 0.5093, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.15317286652078774, | |
| "grad_norm": 0.1747189462184906, | |
| "learning_rate": 0.00016967482645232007, | |
| "loss": 0.5314, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1535375638220277, | |
| "grad_norm": 0.22516067326068878, | |
| "learning_rate": 0.00016960175374497626, | |
| "loss": 0.9766, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.1539022611232677, | |
| "grad_norm": 0.21535086631774902, | |
| "learning_rate": 0.00016952868103763244, | |
| "loss": 0.8287, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.15426695842450766, | |
| "grad_norm": 0.17004358768463135, | |
| "learning_rate": 0.00016945560833028865, | |
| "loss": 0.6612, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.15463165572574764, | |
| "grad_norm": 0.18636055290699005, | |
| "learning_rate": 0.00016938253562294483, | |
| "loss": 0.784, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1549963530269876, | |
| "grad_norm": 0.1781081259250641, | |
| "learning_rate": 0.00016930946291560104, | |
| "loss": 0.5275, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.15536105032822758, | |
| "grad_norm": 0.19579234719276428, | |
| "learning_rate": 0.00016923639020825724, | |
| "loss": 0.7712, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.15572574762946753, | |
| "grad_norm": 0.18568329513072968, | |
| "learning_rate": 0.0001691633175009134, | |
| "loss": 0.7509, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1560904449307075, | |
| "grad_norm": 0.17547035217285156, | |
| "learning_rate": 0.0001690902447935696, | |
| "loss": 0.711, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.15645514223194748, | |
| "grad_norm": 0.19599728286266327, | |
| "learning_rate": 0.00016901717208622581, | |
| "loss": 0.9256, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.15681983953318746, | |
| "grad_norm": 0.18903636932373047, | |
| "learning_rate": 0.000168944099378882, | |
| "loss": 0.7167, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15718453683442743, | |
| "grad_norm": 0.14108788967132568, | |
| "learning_rate": 0.00016887102667153818, | |
| "loss": 0.4165, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.1575492341356674, | |
| "grad_norm": 0.14891500771045685, | |
| "learning_rate": 0.00016879795396419439, | |
| "loss": 0.4908, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.15791393143690738, | |
| "grad_norm": 0.18601083755493164, | |
| "learning_rate": 0.00016872488125685057, | |
| "loss": 0.6856, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.15827862873814733, | |
| "grad_norm": 0.1775410771369934, | |
| "learning_rate": 0.00016865180854950678, | |
| "loss": 0.6805, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1586433260393873, | |
| "grad_norm": 0.19800400733947754, | |
| "learning_rate": 0.00016857873584216296, | |
| "loss": 0.7518, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.15900802334062727, | |
| "grad_norm": 0.21951526403427124, | |
| "learning_rate": 0.00016850566313481914, | |
| "loss": 0.9934, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.15937272064186725, | |
| "grad_norm": 0.15916769206523895, | |
| "learning_rate": 0.00016843259042747535, | |
| "loss": 0.5548, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.15973741794310722, | |
| "grad_norm": 0.20369085669517517, | |
| "learning_rate": 0.00016835951772013155, | |
| "loss": 0.876, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1601021152443472, | |
| "grad_norm": 0.16725899279117584, | |
| "learning_rate": 0.00016828644501278774, | |
| "loss": 0.6011, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.16046681254558717, | |
| "grad_norm": 0.15776625275611877, | |
| "learning_rate": 0.00016821337230544392, | |
| "loss": 0.4744, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16083150984682712, | |
| "grad_norm": 0.20461910963058472, | |
| "learning_rate": 0.00016814029959810013, | |
| "loss": 0.8574, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.1611962071480671, | |
| "grad_norm": 0.1932179033756256, | |
| "learning_rate": 0.0001680672268907563, | |
| "loss": 0.7493, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.16156090444930707, | |
| "grad_norm": 0.17809215188026428, | |
| "learning_rate": 0.00016799415418341252, | |
| "loss": 0.6159, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.16192560175054704, | |
| "grad_norm": 0.19475284218788147, | |
| "learning_rate": 0.0001679210814760687, | |
| "loss": 0.7342, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.16229029905178702, | |
| "grad_norm": 0.17827290296554565, | |
| "learning_rate": 0.00016784800876872488, | |
| "loss": 0.7018, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.162654996353027, | |
| "grad_norm": 0.18410883843898773, | |
| "learning_rate": 0.0001677749360613811, | |
| "loss": 0.689, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.16301969365426697, | |
| "grad_norm": 0.161324605345726, | |
| "learning_rate": 0.00016770186335403727, | |
| "loss": 0.5249, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.16338439095550694, | |
| "grad_norm": 0.1743507832288742, | |
| "learning_rate": 0.00016762879064669348, | |
| "loss": 0.7147, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.1637490882567469, | |
| "grad_norm": 0.18522591888904572, | |
| "learning_rate": 0.00016755571793934966, | |
| "loss": 0.6813, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.16411378555798686, | |
| "grad_norm": 0.18846698105335236, | |
| "learning_rate": 0.00016748264523200587, | |
| "loss": 0.8582, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16447848285922684, | |
| "grad_norm": 0.18033850193023682, | |
| "learning_rate": 0.00016740957252466205, | |
| "loss": 0.7465, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.1648431801604668, | |
| "grad_norm": 0.1762772500514984, | |
| "learning_rate": 0.00016733649981731826, | |
| "loss": 0.6501, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.1652078774617068, | |
| "grad_norm": 0.20865213871002197, | |
| "learning_rate": 0.00016726342710997444, | |
| "loss": 0.5579, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.16557257476294676, | |
| "grad_norm": 0.20231905579566956, | |
| "learning_rate": 0.00016719035440263062, | |
| "loss": 0.8055, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.16593727206418674, | |
| "grad_norm": 0.1755821406841278, | |
| "learning_rate": 0.00016711728169528683, | |
| "loss": 0.6538, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.16630196936542668, | |
| "grad_norm": 0.18525098264217377, | |
| "learning_rate": 0.000167044208987943, | |
| "loss": 0.6979, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.18403197824954987, | |
| "learning_rate": 0.0001669711362805992, | |
| "loss": 0.8222, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.16703136396790663, | |
| "grad_norm": 0.1567261666059494, | |
| "learning_rate": 0.0001668980635732554, | |
| "loss": 0.5104, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.1673960612691466, | |
| "grad_norm": 0.1938832849264145, | |
| "learning_rate": 0.00016682499086591158, | |
| "loss": 0.6972, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.16776075857038658, | |
| "grad_norm": 0.19050729274749756, | |
| "learning_rate": 0.0001667519181585678, | |
| "loss": 0.7917, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.16812545587162656, | |
| "grad_norm": 0.22484062612056732, | |
| "learning_rate": 0.000166678845451224, | |
| "loss": 1.0339, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.16849015317286653, | |
| "grad_norm": 0.18188448250293732, | |
| "learning_rate": 0.00016660577274388015, | |
| "loss": 0.6295, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.1688548504741065, | |
| "grad_norm": 0.17250071465969086, | |
| "learning_rate": 0.00016653270003653636, | |
| "loss": 0.5911, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.16921954777534645, | |
| "grad_norm": 0.177398219704628, | |
| "learning_rate": 0.00016645962732919257, | |
| "loss": 0.6524, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.16958424507658643, | |
| "grad_norm": 0.18105994164943695, | |
| "learning_rate": 0.00016638655462184875, | |
| "loss": 0.6554, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1699489423778264, | |
| "grad_norm": 0.16810593008995056, | |
| "learning_rate": 0.00016631348191450493, | |
| "loss": 0.5617, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.17031363967906638, | |
| "grad_norm": 0.18866056203842163, | |
| "learning_rate": 0.00016624040920716114, | |
| "loss": 0.848, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.17067833698030635, | |
| "grad_norm": 0.18827949464321136, | |
| "learning_rate": 0.00016616733649981732, | |
| "loss": 0.6413, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.17104303428154632, | |
| "grad_norm": 0.16476339101791382, | |
| "learning_rate": 0.00016609426379247353, | |
| "loss": 0.6205, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.1714077315827863, | |
| "grad_norm": 0.1794668436050415, | |
| "learning_rate": 0.0001660211910851297, | |
| "loss": 0.557, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.17177242888402625, | |
| "grad_norm": 0.18404535949230194, | |
| "learning_rate": 0.0001659481183777859, | |
| "loss": 0.7329, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.17213712618526622, | |
| "grad_norm": 0.1756718009710312, | |
| "learning_rate": 0.0001658750456704421, | |
| "loss": 0.5631, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.1725018234865062, | |
| "grad_norm": 0.17083744704723358, | |
| "learning_rate": 0.0001658019729630983, | |
| "loss": 0.6473, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.17286652078774617, | |
| "grad_norm": 0.17385685443878174, | |
| "learning_rate": 0.0001657289002557545, | |
| "loss": 0.6402, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.17323121808898614, | |
| "grad_norm": 0.16864272952079773, | |
| "learning_rate": 0.00016565582754841067, | |
| "loss": 0.5569, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.17359591539022612, | |
| "grad_norm": 0.1955600082874298, | |
| "learning_rate": 0.00016558275484106688, | |
| "loss": 0.8456, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.1739606126914661, | |
| "grad_norm": 0.1845158338546753, | |
| "learning_rate": 0.00016550968213372306, | |
| "loss": 0.6691, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.17432530999270604, | |
| "grad_norm": 0.14487138390541077, | |
| "learning_rate": 0.00016543660942637927, | |
| "loss": 0.3899, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.17469000729394601, | |
| "grad_norm": 0.1681089848279953, | |
| "learning_rate": 0.00016536353671903545, | |
| "loss": 0.5527, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.175054704595186, | |
| "grad_norm": 0.1934751272201538, | |
| "learning_rate": 0.00016529046401169163, | |
| "loss": 0.764, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.17541940189642596, | |
| "grad_norm": 0.17804409563541412, | |
| "learning_rate": 0.00016521739130434784, | |
| "loss": 0.6075, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.17578409919766594, | |
| "grad_norm": 0.17334255576133728, | |
| "learning_rate": 0.00016514431859700402, | |
| "loss": 0.5402, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.1761487964989059, | |
| "grad_norm": 0.166969433426857, | |
| "learning_rate": 0.0001650712458896602, | |
| "loss": 0.5684, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.1765134938001459, | |
| "grad_norm": 0.17517952620983124, | |
| "learning_rate": 0.0001649981731823164, | |
| "loss": 0.6743, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.17687819110138586, | |
| "grad_norm": 0.18539521098136902, | |
| "learning_rate": 0.00016492510047497262, | |
| "loss": 0.7196, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1772428884026258, | |
| "grad_norm": 0.18387848138809204, | |
| "learning_rate": 0.0001648520277676288, | |
| "loss": 0.6336, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.17760758570386578, | |
| "grad_norm": 0.18015360832214355, | |
| "learning_rate": 0.000164778955060285, | |
| "loss": 0.7254, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.17797228300510576, | |
| "grad_norm": 0.1397644728422165, | |
| "learning_rate": 0.0001647058823529412, | |
| "loss": 0.3431, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.17833698030634573, | |
| "grad_norm": 0.22470806539058685, | |
| "learning_rate": 0.00016463280964559737, | |
| "loss": 0.9921, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.1787016776075857, | |
| "grad_norm": 0.2027674913406372, | |
| "learning_rate": 0.00016455973693825358, | |
| "loss": 0.8432, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17906637490882568, | |
| "grad_norm": 0.16964022815227509, | |
| "learning_rate": 0.00016448666423090976, | |
| "loss": 0.659, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.17943107221006566, | |
| "grad_norm": 0.18222148716449738, | |
| "learning_rate": 0.00016441359152356594, | |
| "loss": 0.6367, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.1797957695113056, | |
| "grad_norm": 0.19195525348186493, | |
| "learning_rate": 0.00016434051881622215, | |
| "loss": 0.657, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.18016046681254558, | |
| "grad_norm": 0.17382116615772247, | |
| "learning_rate": 0.00016426744610887833, | |
| "loss": 0.6367, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.18052516411378555, | |
| "grad_norm": 0.18641424179077148, | |
| "learning_rate": 0.00016419437340153454, | |
| "loss": 0.7523, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.18088986141502553, | |
| "grad_norm": 0.1832839697599411, | |
| "learning_rate": 0.00016412130069419075, | |
| "loss": 0.7381, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.1812545587162655, | |
| "grad_norm": 0.1541210114955902, | |
| "learning_rate": 0.0001640482279868469, | |
| "loss": 0.5941, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.18161925601750548, | |
| "grad_norm": 0.22230711579322815, | |
| "learning_rate": 0.0001639751552795031, | |
| "loss": 0.9853, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.18198395331874545, | |
| "grad_norm": 0.17912793159484863, | |
| "learning_rate": 0.00016390208257215932, | |
| "loss": 0.6753, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.18234865061998543, | |
| "grad_norm": 0.1971275359392166, | |
| "learning_rate": 0.0001638290098648155, | |
| "loss": 0.7659, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18271334792122537, | |
| "grad_norm": 0.19420406222343445, | |
| "learning_rate": 0.00016375593715747168, | |
| "loss": 0.7616, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.18307804522246535, | |
| "grad_norm": 0.16759932041168213, | |
| "learning_rate": 0.0001636828644501279, | |
| "loss": 0.5594, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.18344274252370532, | |
| "grad_norm": 0.1994057297706604, | |
| "learning_rate": 0.00016360979174278407, | |
| "loss": 0.8499, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.1838074398249453, | |
| "grad_norm": 0.1942475587129593, | |
| "learning_rate": 0.00016353671903544028, | |
| "loss": 0.8477, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.18417213712618527, | |
| "grad_norm": 0.20013171434402466, | |
| "learning_rate": 0.00016346364632809646, | |
| "loss": 0.8432, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.18453683442742524, | |
| "grad_norm": 0.19940058887004852, | |
| "learning_rate": 0.00016339057362075264, | |
| "loss": 0.9063, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.18490153172866522, | |
| "grad_norm": 0.17371918261051178, | |
| "learning_rate": 0.00016331750091340885, | |
| "loss": 0.755, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.18526622902990517, | |
| "grad_norm": 0.1687958985567093, | |
| "learning_rate": 0.00016324442820606506, | |
| "loss": 0.6164, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.18563092633114514, | |
| "grad_norm": 0.1572989672422409, | |
| "learning_rate": 0.00016317135549872124, | |
| "loss": 0.5366, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.18599562363238512, | |
| "grad_norm": 0.1621757596731186, | |
| "learning_rate": 0.00016309828279137742, | |
| "loss": 0.572, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1863603209336251, | |
| "grad_norm": 0.16226086020469666, | |
| "learning_rate": 0.00016302521008403363, | |
| "loss": 0.6112, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.18672501823486506, | |
| "grad_norm": 0.19324566423892975, | |
| "learning_rate": 0.0001629521373766898, | |
| "loss": 0.8287, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.18708971553610504, | |
| "grad_norm": 0.18370205163955688, | |
| "learning_rate": 0.00016287906466934602, | |
| "loss": 0.7856, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.187454412837345, | |
| "grad_norm": 0.22035083174705505, | |
| "learning_rate": 0.0001628059919620022, | |
| "loss": 0.9329, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.187819110138585, | |
| "grad_norm": 0.1782984435558319, | |
| "learning_rate": 0.00016273291925465838, | |
| "loss": 0.8342, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.18818380743982493, | |
| "grad_norm": 0.23118692636489868, | |
| "learning_rate": 0.0001626598465473146, | |
| "loss": 0.9808, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.1885485047410649, | |
| "grad_norm": 0.1947745978832245, | |
| "learning_rate": 0.00016258677383997077, | |
| "loss": 0.8156, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.18891320204230488, | |
| "grad_norm": 0.18419331312179565, | |
| "learning_rate": 0.00016251370113262695, | |
| "loss": 0.7476, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.18927789934354486, | |
| "grad_norm": 0.1725061982870102, | |
| "learning_rate": 0.00016244062842528316, | |
| "loss": 0.646, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.18964259664478483, | |
| "grad_norm": 0.17075173556804657, | |
| "learning_rate": 0.00016236755571793937, | |
| "loss": 0.5723, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1900072939460248, | |
| "grad_norm": 0.17799945175647736, | |
| "learning_rate": 0.00016229448301059555, | |
| "loss": 0.8289, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.19037199124726478, | |
| "grad_norm": 0.18537406623363495, | |
| "learning_rate": 0.00016222141030325176, | |
| "loss": 0.8107, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.19073668854850473, | |
| "grad_norm": 0.20047864317893982, | |
| "learning_rate": 0.00016214833759590794, | |
| "loss": 0.9138, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.1911013858497447, | |
| "grad_norm": 0.20184080302715302, | |
| "learning_rate": 0.00016207526488856412, | |
| "loss": 0.8424, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.19146608315098468, | |
| "grad_norm": 0.2016812413930893, | |
| "learning_rate": 0.00016200219218122033, | |
| "loss": 0.971, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.19183078045222465, | |
| "grad_norm": 0.17220567166805267, | |
| "learning_rate": 0.0001619291194738765, | |
| "loss": 0.6097, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.19219547775346463, | |
| "grad_norm": 0.16681768000125885, | |
| "learning_rate": 0.0001618560467665327, | |
| "loss": 0.6361, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.1925601750547046, | |
| "grad_norm": 0.1976706087589264, | |
| "learning_rate": 0.0001617829740591889, | |
| "loss": 0.8039, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.19292487235594458, | |
| "grad_norm": 0.14784552156925201, | |
| "learning_rate": 0.00016170990135184508, | |
| "loss": 0.4833, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.19328956965718452, | |
| "grad_norm": 0.18679118156433105, | |
| "learning_rate": 0.0001616368286445013, | |
| "loss": 0.8576, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1936542669584245, | |
| "grad_norm": 0.1557287573814392, | |
| "learning_rate": 0.0001615637559371575, | |
| "loss": 0.5705, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.19401896425966447, | |
| "grad_norm": 0.19311803579330444, | |
| "learning_rate": 0.00016149068322981365, | |
| "loss": 0.7767, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.19438366156090445, | |
| "grad_norm": 0.14209668338298798, | |
| "learning_rate": 0.00016141761052246986, | |
| "loss": 0.4313, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.19474835886214442, | |
| "grad_norm": 0.15875539183616638, | |
| "learning_rate": 0.00016134453781512607, | |
| "loss": 0.5665, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.1951130561633844, | |
| "grad_norm": 0.17052525281906128, | |
| "learning_rate": 0.00016127146510778225, | |
| "loss": 0.507, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.19547775346462437, | |
| "grad_norm": 0.19006414711475372, | |
| "learning_rate": 0.00016119839240043843, | |
| "loss": 0.6866, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.19584245076586435, | |
| "grad_norm": 0.1550920307636261, | |
| "learning_rate": 0.00016112531969309464, | |
| "loss": 0.537, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.1962071480671043, | |
| "grad_norm": 0.17453649640083313, | |
| "learning_rate": 0.00016105224698575082, | |
| "loss": 0.5476, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.19657184536834427, | |
| "grad_norm": 0.1762418895959854, | |
| "learning_rate": 0.00016097917427840703, | |
| "loss": 0.7063, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.19693654266958424, | |
| "grad_norm": 0.1798284500837326, | |
| "learning_rate": 0.0001609061015710632, | |
| "loss": 0.6109, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.19730123997082422, | |
| "grad_norm": 0.18691901862621307, | |
| "learning_rate": 0.0001608330288637194, | |
| "loss": 0.7341, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.1976659372720642, | |
| "grad_norm": 0.21478383243083954, | |
| "learning_rate": 0.0001607599561563756, | |
| "loss": 0.8744, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.19803063457330417, | |
| "grad_norm": 0.17121157050132751, | |
| "learning_rate": 0.0001606868834490318, | |
| "loss": 0.54, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.19839533187454414, | |
| "grad_norm": 0.1967833936214447, | |
| "learning_rate": 0.000160613810741688, | |
| "loss": 0.5669, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.1987600291757841, | |
| "grad_norm": 0.16104774177074432, | |
| "learning_rate": 0.00016054073803434417, | |
| "loss": 0.4524, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.19912472647702406, | |
| "grad_norm": 0.17480875551700592, | |
| "learning_rate": 0.00016046766532700038, | |
| "loss": 0.6578, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.19948942377826404, | |
| "grad_norm": 0.16630177199840546, | |
| "learning_rate": 0.00016039459261965656, | |
| "loss": 0.6371, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.199854121079504, | |
| "grad_norm": 0.18494483828544617, | |
| "learning_rate": 0.00016032151991231277, | |
| "loss": 0.7218, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.20021881838074398, | |
| "grad_norm": 0.17873701453208923, | |
| "learning_rate": 0.00016024844720496895, | |
| "loss": 0.6248, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.20058351568198396, | |
| "grad_norm": 0.16306859254837036, | |
| "learning_rate": 0.00016017537449762513, | |
| "loss": 0.5553, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20094821298322393, | |
| "grad_norm": 0.1972372829914093, | |
| "learning_rate": 0.00016010230179028134, | |
| "loss": 0.8172, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.2013129102844639, | |
| "grad_norm": 0.21976488828659058, | |
| "learning_rate": 0.00016002922908293752, | |
| "loss": 1.0276, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.20167760758570386, | |
| "grad_norm": 0.16156013309955597, | |
| "learning_rate": 0.0001599561563755937, | |
| "loss": 0.5599, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.20204230488694383, | |
| "grad_norm": 0.18124879896640778, | |
| "learning_rate": 0.00015988308366824991, | |
| "loss": 0.6038, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.2024070021881838, | |
| "grad_norm": 0.16870321333408356, | |
| "learning_rate": 0.00015981001096090612, | |
| "loss": 0.6956, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.20277169948942378, | |
| "grad_norm": 0.18470539152622223, | |
| "learning_rate": 0.0001597369382535623, | |
| "loss": 0.7895, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.20313639679066375, | |
| "grad_norm": 0.1810760498046875, | |
| "learning_rate": 0.0001596638655462185, | |
| "loss": 0.7416, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.20350109409190373, | |
| "grad_norm": 0.19518287479877472, | |
| "learning_rate": 0.0001595907928388747, | |
| "loss": 0.8724, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.2038657913931437, | |
| "grad_norm": 0.16267555952072144, | |
| "learning_rate": 0.00015951772013153087, | |
| "loss": 0.6208, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.20423048869438365, | |
| "grad_norm": 0.19488964974880219, | |
| "learning_rate": 0.00015944464742418708, | |
| "loss": 0.8636, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.20459518599562362, | |
| "grad_norm": 0.1762942373752594, | |
| "learning_rate": 0.00015937157471684326, | |
| "loss": 0.6351, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.2049598832968636, | |
| "grad_norm": 0.1685190051794052, | |
| "learning_rate": 0.00015929850200949945, | |
| "loss": 0.7862, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.20532458059810357, | |
| "grad_norm": 0.17033015191555023, | |
| "learning_rate": 0.00015922542930215565, | |
| "loss": 0.7065, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.20568927789934355, | |
| "grad_norm": 0.1639910638332367, | |
| "learning_rate": 0.00015915235659481184, | |
| "loss": 0.5806, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.20605397520058352, | |
| "grad_norm": 0.16588236391544342, | |
| "learning_rate": 0.00015907928388746804, | |
| "loss": 0.639, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.2064186725018235, | |
| "grad_norm": 0.1781357228755951, | |
| "learning_rate": 0.00015900621118012423, | |
| "loss": 0.7193, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.20678336980306344, | |
| "grad_norm": 0.2006876915693283, | |
| "learning_rate": 0.0001589331384727804, | |
| "loss": 0.8255, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.20714806710430342, | |
| "grad_norm": 0.24458666145801544, | |
| "learning_rate": 0.00015886006576543661, | |
| "loss": 0.7577, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.2075127644055434, | |
| "grad_norm": 0.18870210647583008, | |
| "learning_rate": 0.00015878699305809282, | |
| "loss": 0.7712, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.20787746170678337, | |
| "grad_norm": 0.1800808161497116, | |
| "learning_rate": 0.000158713920350749, | |
| "loss": 0.5715, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20824215900802334, | |
| "grad_norm": 0.16994856297969818, | |
| "learning_rate": 0.00015864084764340519, | |
| "loss": 0.6102, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.20860685630926332, | |
| "grad_norm": 0.21051564812660217, | |
| "learning_rate": 0.0001585677749360614, | |
| "loss": 0.9531, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.2089715536105033, | |
| "grad_norm": 0.17979969084262848, | |
| "learning_rate": 0.00015849470222871758, | |
| "loss": 0.6933, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.20933625091174327, | |
| "grad_norm": 0.17062553763389587, | |
| "learning_rate": 0.00015842162952137378, | |
| "loss": 0.5809, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.2097009482129832, | |
| "grad_norm": 0.1818009465932846, | |
| "learning_rate": 0.00015834855681402997, | |
| "loss": 0.6487, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2100656455142232, | |
| "grad_norm": 0.19140908122062683, | |
| "learning_rate": 0.00015827548410668615, | |
| "loss": 0.7887, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.21043034281546316, | |
| "grad_norm": 0.15934517979621887, | |
| "learning_rate": 0.00015820241139934236, | |
| "loss": 0.5761, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.21079504011670314, | |
| "grad_norm": 0.16064375638961792, | |
| "learning_rate": 0.00015812933869199856, | |
| "loss": 0.5708, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.2111597374179431, | |
| "grad_norm": 0.18466386198997498, | |
| "learning_rate": 0.00015805626598465474, | |
| "loss": 0.7036, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.21152443471918309, | |
| "grad_norm": 0.17533251643180847, | |
| "learning_rate": 0.00015798319327731093, | |
| "loss": 0.801, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21188913202042306, | |
| "grad_norm": 0.1585182249546051, | |
| "learning_rate": 0.00015791012056996713, | |
| "loss": 0.5439, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.212253829321663, | |
| "grad_norm": 0.18301182985305786, | |
| "learning_rate": 0.00015783704786262332, | |
| "loss": 0.6721, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.21261852662290298, | |
| "grad_norm": 0.19599053263664246, | |
| "learning_rate": 0.00015776397515527952, | |
| "loss": 0.7901, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.21298322392414296, | |
| "grad_norm": 0.19529075920581818, | |
| "learning_rate": 0.0001576909024479357, | |
| "loss": 0.7955, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.21334792122538293, | |
| "grad_norm": 0.17965692281723022, | |
| "learning_rate": 0.0001576178297405919, | |
| "loss": 0.6761, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2137126185266229, | |
| "grad_norm": 0.15989278256893158, | |
| "learning_rate": 0.0001575447570332481, | |
| "loss": 0.5406, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.21407731582786288, | |
| "grad_norm": 0.18571379780769348, | |
| "learning_rate": 0.00015747168432590428, | |
| "loss": 0.784, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.21444201312910285, | |
| "grad_norm": 0.17124933004379272, | |
| "learning_rate": 0.00015739861161856046, | |
| "loss": 0.6769, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.21480671043034283, | |
| "grad_norm": 0.2020951509475708, | |
| "learning_rate": 0.00015732553891121667, | |
| "loss": 0.9287, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.21517140773158278, | |
| "grad_norm": 0.1693318635225296, | |
| "learning_rate": 0.00015725246620387287, | |
| "loss": 0.6093, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.21553610503282275, | |
| "grad_norm": 0.1748688966035843, | |
| "learning_rate": 0.00015717939349652906, | |
| "loss": 0.6981, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.21590080233406272, | |
| "grad_norm": 0.15914174914360046, | |
| "learning_rate": 0.00015710632078918526, | |
| "loss": 0.5255, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.2162654996353027, | |
| "grad_norm": 0.16605016589164734, | |
| "learning_rate": 0.00015703324808184145, | |
| "loss": 0.613, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.21663019693654267, | |
| "grad_norm": 0.13033540546894073, | |
| "learning_rate": 0.00015696017537449763, | |
| "loss": 0.2626, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.21699489423778265, | |
| "grad_norm": 0.17081047594547272, | |
| "learning_rate": 0.00015688710266715384, | |
| "loss": 0.6266, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.21735959153902262, | |
| "grad_norm": 0.18738578259944916, | |
| "learning_rate": 0.00015681402995981002, | |
| "loss": 0.7505, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.21772428884026257, | |
| "grad_norm": 0.18220360577106476, | |
| "learning_rate": 0.0001567409572524662, | |
| "loss": 0.6359, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.21808898614150254, | |
| "grad_norm": 0.12919798493385315, | |
| "learning_rate": 0.0001566678845451224, | |
| "loss": 0.2671, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.21845368344274252, | |
| "grad_norm": 0.19884516298770905, | |
| "learning_rate": 0.0001565948118377786, | |
| "loss": 0.7276, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.2188183807439825, | |
| "grad_norm": 0.21019931137561798, | |
| "learning_rate": 0.0001565217391304348, | |
| "loss": 0.8272, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21918307804522247, | |
| "grad_norm": 0.18614532053470612, | |
| "learning_rate": 0.00015644866642309098, | |
| "loss": 0.6676, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.21954777534646244, | |
| "grad_norm": 0.1795649528503418, | |
| "learning_rate": 0.00015637559371574716, | |
| "loss": 0.7105, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.21991247264770242, | |
| "grad_norm": 0.2079457938671112, | |
| "learning_rate": 0.00015630252100840337, | |
| "loss": 1.0025, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.22027716994894236, | |
| "grad_norm": 0.16998709738254547, | |
| "learning_rate": 0.00015622944830105958, | |
| "loss": 0.6432, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.22064186725018234, | |
| "grad_norm": 0.20293568074703217, | |
| "learning_rate": 0.00015615637559371576, | |
| "loss": 0.9977, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.2210065645514223, | |
| "grad_norm": 0.1662643551826477, | |
| "learning_rate": 0.00015608330288637194, | |
| "loss": 0.5779, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.2213712618526623, | |
| "grad_norm": 0.21681144833564758, | |
| "learning_rate": 0.00015601023017902815, | |
| "loss": 0.9133, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.22173595915390226, | |
| "grad_norm": 0.1861148327589035, | |
| "learning_rate": 0.00015593715747168433, | |
| "loss": 0.854, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.22210065645514224, | |
| "grad_norm": 0.15234194695949554, | |
| "learning_rate": 0.00015586408476434054, | |
| "loss": 0.5688, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.2224653537563822, | |
| "grad_norm": 0.18444177508354187, | |
| "learning_rate": 0.00015579101205699672, | |
| "loss": 0.8598, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.22283005105762219, | |
| "grad_norm": 0.1650354266166687, | |
| "learning_rate": 0.0001557179393496529, | |
| "loss": 0.5645, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.22319474835886213, | |
| "grad_norm": 0.1783570498228073, | |
| "learning_rate": 0.0001556448666423091, | |
| "loss": 0.6918, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.2235594456601021, | |
| "grad_norm": 0.201633483171463, | |
| "learning_rate": 0.00015557179393496532, | |
| "loss": 0.9097, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.22392414296134208, | |
| "grad_norm": 0.1808750182390213, | |
| "learning_rate": 0.00015549872122762147, | |
| "loss": 0.5749, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.22428884026258206, | |
| "grad_norm": 0.18188488483428955, | |
| "learning_rate": 0.00015542564852027768, | |
| "loss": 0.7137, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.22465353756382203, | |
| "grad_norm": 0.1791074126958847, | |
| "learning_rate": 0.0001553525758129339, | |
| "loss": 0.8679, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.225018234865062, | |
| "grad_norm": 0.18888451159000397, | |
| "learning_rate": 0.00015527950310559007, | |
| "loss": 0.8296, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.22538293216630198, | |
| "grad_norm": 0.1985502392053604, | |
| "learning_rate": 0.00015520643039824628, | |
| "loss": 0.8388, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.22574762946754193, | |
| "grad_norm": 0.20040510594844818, | |
| "learning_rate": 0.00015513335769090246, | |
| "loss": 0.8381, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.2261123267687819, | |
| "grad_norm": 0.20879292488098145, | |
| "learning_rate": 0.00015506028498355864, | |
| "loss": 0.9998, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.22647702407002188, | |
| "grad_norm": 0.18888017535209656, | |
| "learning_rate": 0.00015498721227621485, | |
| "loss": 0.6915, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.22684172137126185, | |
| "grad_norm": 0.19784311950206757, | |
| "learning_rate": 0.00015491413956887103, | |
| "loss": 1.054, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.22720641867250183, | |
| "grad_norm": 0.16005344688892365, | |
| "learning_rate": 0.0001548410668615272, | |
| "loss": 0.4895, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.2275711159737418, | |
| "grad_norm": 0.20644895732402802, | |
| "learning_rate": 0.00015476799415418342, | |
| "loss": 0.8316, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.22793581327498177, | |
| "grad_norm": 0.18263429403305054, | |
| "learning_rate": 0.00015469492144683963, | |
| "loss": 0.7931, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.22830051057622175, | |
| "grad_norm": 0.18260689079761505, | |
| "learning_rate": 0.0001546218487394958, | |
| "loss": 0.6584, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.2286652078774617, | |
| "grad_norm": 0.20142361521720886, | |
| "learning_rate": 0.00015454877603215202, | |
| "loss": 0.7865, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.22902990517870167, | |
| "grad_norm": 0.163302943110466, | |
| "learning_rate": 0.0001544757033248082, | |
| "loss": 0.6348, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.22939460247994164, | |
| "grad_norm": 0.1729688048362732, | |
| "learning_rate": 0.00015440263061746438, | |
| "loss": 0.6776, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.22975929978118162, | |
| "grad_norm": 0.167247012257576, | |
| "learning_rate": 0.0001543295579101206, | |
| "loss": 0.6274, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2301239970824216, | |
| "grad_norm": 0.17160290479660034, | |
| "learning_rate": 0.00015425648520277677, | |
| "loss": 0.6194, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.23048869438366157, | |
| "grad_norm": 0.17698507010936737, | |
| "learning_rate": 0.00015418341249543295, | |
| "loss": 0.7014, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.23085339168490154, | |
| "grad_norm": 0.1988571584224701, | |
| "learning_rate": 0.00015411033978808916, | |
| "loss": 0.8195, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.2312180889861415, | |
| "grad_norm": 0.18846118450164795, | |
| "learning_rate": 0.00015403726708074534, | |
| "loss": 0.711, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.23158278628738146, | |
| "grad_norm": 0.1780976951122284, | |
| "learning_rate": 0.00015396419437340155, | |
| "loss": 0.6782, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.23194748358862144, | |
| "grad_norm": 0.17137964069843292, | |
| "learning_rate": 0.00015389112166605773, | |
| "loss": 0.6011, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.2323121808898614, | |
| "grad_norm": 0.18975742161273956, | |
| "learning_rate": 0.0001538180489587139, | |
| "loss": 0.6786, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.2326768781911014, | |
| "grad_norm": 0.17563198506832123, | |
| "learning_rate": 0.00015374497625137012, | |
| "loss": 0.6222, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.23304157549234136, | |
| "grad_norm": 0.17805379629135132, | |
| "learning_rate": 0.00015367190354402633, | |
| "loss": 0.6959, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.23340627279358134, | |
| "grad_norm": 0.1799224615097046, | |
| "learning_rate": 0.0001535988308366825, | |
| "loss": 0.6508, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2337709700948213, | |
| "grad_norm": 0.1822444349527359, | |
| "learning_rate": 0.0001535257581293387, | |
| "loss": 0.6701, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.23413566739606126, | |
| "grad_norm": 0.18402761220932007, | |
| "learning_rate": 0.0001534526854219949, | |
| "loss": 0.7839, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.23450036469730123, | |
| "grad_norm": 0.1818612664937973, | |
| "learning_rate": 0.00015337961271465108, | |
| "loss": 0.7694, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.2348650619985412, | |
| "grad_norm": 0.17730721831321716, | |
| "learning_rate": 0.0001533065400073073, | |
| "loss": 0.6292, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.23522975929978118, | |
| "grad_norm": 0.1790621429681778, | |
| "learning_rate": 0.00015323346729996347, | |
| "loss": 0.729, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.23559445660102116, | |
| "grad_norm": 0.17606894671916962, | |
| "learning_rate": 0.00015316039459261965, | |
| "loss": 0.6977, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.23595915390226113, | |
| "grad_norm": 0.18288281559944153, | |
| "learning_rate": 0.00015308732188527586, | |
| "loss": 0.7042, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.2363238512035011, | |
| "grad_norm": 0.17753863334655762, | |
| "learning_rate": 0.00015301424917793207, | |
| "loss": 0.7813, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.23668854850474105, | |
| "grad_norm": 0.1613297015428543, | |
| "learning_rate": 0.00015294117647058822, | |
| "loss": 0.5043, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.23705324580598103, | |
| "grad_norm": 0.19487784802913666, | |
| "learning_rate": 0.00015286810376324443, | |
| "loss": 0.8338, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.237417943107221, | |
| "grad_norm": 0.17686185240745544, | |
| "learning_rate": 0.00015279503105590064, | |
| "loss": 0.6406, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.23778264040846098, | |
| "grad_norm": 0.18062898516654968, | |
| "learning_rate": 0.00015272195834855682, | |
| "loss": 0.772, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.23814733770970095, | |
| "grad_norm": 0.1765890270471573, | |
| "learning_rate": 0.00015264888564121303, | |
| "loss": 0.6313, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.23851203501094093, | |
| "grad_norm": 0.18338140845298767, | |
| "learning_rate": 0.0001525758129338692, | |
| "loss": 0.6944, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.2388767323121809, | |
| "grad_norm": 0.16114097833633423, | |
| "learning_rate": 0.0001525027402265254, | |
| "loss": 0.4307, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.23924142961342085, | |
| "grad_norm": 0.18049006164073944, | |
| "learning_rate": 0.0001524296675191816, | |
| "loss": 0.6055, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.23960612691466082, | |
| "grad_norm": 0.17693577706813812, | |
| "learning_rate": 0.00015235659481183778, | |
| "loss": 0.6953, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.2399708242159008, | |
| "grad_norm": 0.17377278208732605, | |
| "learning_rate": 0.00015228352210449396, | |
| "loss": 0.616, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.24033552151714077, | |
| "grad_norm": 0.2092607021331787, | |
| "learning_rate": 0.00015221044939715017, | |
| "loss": 0.9593, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.24070021881838075, | |
| "grad_norm": 0.18305638432502747, | |
| "learning_rate": 0.00015213737668980638, | |
| "loss": 0.7737, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.24106491611962072, | |
| "grad_norm": 0.17872866988182068, | |
| "learning_rate": 0.00015206430398246256, | |
| "loss": 0.6324, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.2414296134208607, | |
| "grad_norm": 0.16150373220443726, | |
| "learning_rate": 0.00015199123127511874, | |
| "loss": 0.4876, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.24179431072210067, | |
| "grad_norm": 0.16832824051380157, | |
| "learning_rate": 0.00015191815856777495, | |
| "loss": 0.5724, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.24215900802334062, | |
| "grad_norm": 0.17251615226268768, | |
| "learning_rate": 0.00015184508586043113, | |
| "loss": 0.6452, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2425237053245806, | |
| "grad_norm": 0.1856648325920105, | |
| "learning_rate": 0.00015177201315308734, | |
| "loss": 0.6863, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.24288840262582057, | |
| "grad_norm": 0.17599360644817352, | |
| "learning_rate": 0.00015169894044574352, | |
| "loss": 0.573, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.24325309992706054, | |
| "grad_norm": 0.1808532327413559, | |
| "learning_rate": 0.0001516258677383997, | |
| "loss": 0.6037, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.24361779722830051, | |
| "grad_norm": 0.18831709027290344, | |
| "learning_rate": 0.0001515527950310559, | |
| "loss": 0.8105, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.2439824945295405, | |
| "grad_norm": 0.1883680522441864, | |
| "learning_rate": 0.0001514797223237121, | |
| "loss": 0.7836, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.24434719183078046, | |
| "grad_norm": 0.18449294567108154, | |
| "learning_rate": 0.0001514066496163683, | |
| "loss": 0.74, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2447118891320204, | |
| "grad_norm": 0.14520494639873505, | |
| "learning_rate": 0.00015133357690902448, | |
| "loss": 0.4257, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.24507658643326038, | |
| "grad_norm": 0.16999614238739014, | |
| "learning_rate": 0.00015126050420168066, | |
| "loss": 0.5913, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.24544128373450036, | |
| "grad_norm": 0.19084987044334412, | |
| "learning_rate": 0.00015118743149433687, | |
| "loss": 0.7187, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.24580598103574033, | |
| "grad_norm": 0.16345396637916565, | |
| "learning_rate": 0.00015111435878699308, | |
| "loss": 0.5708, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2461706783369803, | |
| "grad_norm": 0.19265908002853394, | |
| "learning_rate": 0.00015104128607964926, | |
| "loss": 0.5983, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.24653537563822028, | |
| "grad_norm": 0.20187486708164215, | |
| "learning_rate": 0.00015096821337230544, | |
| "loss": 0.7784, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.24690007293946026, | |
| "grad_norm": 0.19124649465084076, | |
| "learning_rate": 0.00015089514066496165, | |
| "loss": 0.7033, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.24726477024070023, | |
| "grad_norm": 0.18420779705047607, | |
| "learning_rate": 0.00015082206795761783, | |
| "loss": 0.7342, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.24762946754194018, | |
| "grad_norm": 0.18304283916950226, | |
| "learning_rate": 0.00015074899525027404, | |
| "loss": 0.7119, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.24799416484318015, | |
| "grad_norm": 0.17313408851623535, | |
| "learning_rate": 0.00015067592254293022, | |
| "loss": 0.7298, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.24835886214442013, | |
| "grad_norm": 0.1871861219406128, | |
| "learning_rate": 0.0001506028498355864, | |
| "loss": 0.6186, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.2487235594456601, | |
| "grad_norm": 0.20435944199562073, | |
| "learning_rate": 0.0001505297771282426, | |
| "loss": 0.843, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.24908825674690008, | |
| "grad_norm": 0.16381756961345673, | |
| "learning_rate": 0.00015045670442089882, | |
| "loss": 0.5857, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.24945295404814005, | |
| "grad_norm": 0.18176475167274475, | |
| "learning_rate": 0.00015038363171355497, | |
| "loss": 0.7148, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.24981765134938003, | |
| "grad_norm": 0.1591276228427887, | |
| "learning_rate": 0.00015031055900621118, | |
| "loss": 0.6348, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.25018234865062, | |
| "grad_norm": 0.16380302608013153, | |
| "learning_rate": 0.0001502374862988674, | |
| "loss": 0.5438, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.25054704595185995, | |
| "grad_norm": 0.16611479222774506, | |
| "learning_rate": 0.00015016441359152357, | |
| "loss": 0.509, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.25091174325309995, | |
| "grad_norm": 0.188828706741333, | |
| "learning_rate": 0.00015009134088417978, | |
| "loss": 0.7431, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.2512764405543399, | |
| "grad_norm": 0.16808100044727325, | |
| "learning_rate": 0.00015001826817683596, | |
| "loss": 0.5523, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.25164113785557984, | |
| "grad_norm": 0.20357143878936768, | |
| "learning_rate": 0.00014994519546949214, | |
| "loss": 0.7993, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.25200583515681985, | |
| "grad_norm": 0.18831704556941986, | |
| "learning_rate": 0.00014987212276214835, | |
| "loss": 0.7521, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.2523705324580598, | |
| "grad_norm": 0.17688477039337158, | |
| "learning_rate": 0.00014979905005480453, | |
| "loss": 0.6743, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.2527352297592998, | |
| "grad_norm": 0.1944332718849182, | |
| "learning_rate": 0.00014972597734746071, | |
| "loss": 0.869, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.25309992706053974, | |
| "grad_norm": 0.1805860847234726, | |
| "learning_rate": 0.00014965290464011692, | |
| "loss": 0.7884, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.25346462436177974, | |
| "grad_norm": 0.149339959025383, | |
| "learning_rate": 0.00014957983193277313, | |
| "loss": 0.4521, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.2538293216630197, | |
| "grad_norm": 0.16970299184322357, | |
| "learning_rate": 0.0001495067592254293, | |
| "loss": 0.6262, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.25419401896425964, | |
| "grad_norm": 0.17406289279460907, | |
| "learning_rate": 0.0001494336865180855, | |
| "loss": 0.7258, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.25455871626549964, | |
| "grad_norm": 0.18558435142040253, | |
| "learning_rate": 0.0001493606138107417, | |
| "loss": 0.6835, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.2549234135667396, | |
| "grad_norm": 0.16771887242794037, | |
| "learning_rate": 0.00014928754110339788, | |
| "loss": 0.506, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.2552881108679796, | |
| "grad_norm": 0.2056199610233307, | |
| "learning_rate": 0.0001492144683960541, | |
| "loss": 0.8879, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.25565280816921954, | |
| "grad_norm": 0.16422101855278015, | |
| "learning_rate": 0.00014914139568871027, | |
| "loss": 0.4785, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.25601750547045954, | |
| "grad_norm": 0.17392300069332123, | |
| "learning_rate": 0.00014906832298136645, | |
| "loss": 0.667, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2563822027716995, | |
| "grad_norm": 0.18036240339279175, | |
| "learning_rate": 0.00014899525027402266, | |
| "loss": 0.6646, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.2567469000729395, | |
| "grad_norm": 0.19860827922821045, | |
| "learning_rate": 0.00014892217756667884, | |
| "loss": 0.8341, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.25711159737417943, | |
| "grad_norm": 0.15973514318466187, | |
| "learning_rate": 0.00014884910485933505, | |
| "loss": 0.5357, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.2574762946754194, | |
| "grad_norm": 0.17433969676494598, | |
| "learning_rate": 0.00014877603215199123, | |
| "loss": 0.6615, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.2578409919766594, | |
| "grad_norm": 0.17215828597545624, | |
| "learning_rate": 0.00014870295944464742, | |
| "loss": 0.547, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.25820568927789933, | |
| "grad_norm": 0.19459031522274017, | |
| "learning_rate": 0.00014862988673730362, | |
| "loss": 0.8744, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.25857038657913933, | |
| "grad_norm": 0.18972012400627136, | |
| "learning_rate": 0.00014855681402995983, | |
| "loss": 0.7687, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.2589350838803793, | |
| "grad_norm": 0.17814265191555023, | |
| "learning_rate": 0.00014848374132261599, | |
| "loss": 0.627, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2592997811816193, | |
| "grad_norm": 0.19156868755817413, | |
| "learning_rate": 0.0001484106686152722, | |
| "loss": 0.882, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.25966447848285923, | |
| "grad_norm": 0.17542894184589386, | |
| "learning_rate": 0.0001483375959079284, | |
| "loss": 0.7863, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2600291757840992, | |
| "grad_norm": 0.15451830625534058, | |
| "learning_rate": 0.00014826452320058458, | |
| "loss": 0.5987, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.2603938730853392, | |
| "grad_norm": 0.17926959693431854, | |
| "learning_rate": 0.0001481914504932408, | |
| "loss": 0.6238, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.2607585703865791, | |
| "grad_norm": 0.15617339313030243, | |
| "learning_rate": 0.00014811837778589697, | |
| "loss": 0.611, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.2611232676878191, | |
| "grad_norm": 0.18667645752429962, | |
| "learning_rate": 0.00014804530507855316, | |
| "loss": 0.657, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2614879649890591, | |
| "grad_norm": 0.1547141671180725, | |
| "learning_rate": 0.00014797223237120936, | |
| "loss": 0.4469, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.2618526622902991, | |
| "grad_norm": 0.18968114256858826, | |
| "learning_rate": 0.00014789915966386557, | |
| "loss": 0.8392, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.262217359591539, | |
| "grad_norm": 0.1685071587562561, | |
| "learning_rate": 0.00014782608695652173, | |
| "loss": 0.5476, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.26258205689277897, | |
| "grad_norm": 0.18805775046348572, | |
| "learning_rate": 0.00014775301424917793, | |
| "loss": 0.7899, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.262946754194019, | |
| "grad_norm": 0.1985326111316681, | |
| "learning_rate": 0.00014767994154183414, | |
| "loss": 0.8315, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.2633114514952589, | |
| "grad_norm": 0.18817968666553497, | |
| "learning_rate": 0.00014760686883449032, | |
| "loss": 0.8077, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.2636761487964989, | |
| "grad_norm": 0.1707736700773239, | |
| "learning_rate": 0.00014753379612714653, | |
| "loss": 0.5055, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.26404084609773887, | |
| "grad_norm": 0.17432935535907745, | |
| "learning_rate": 0.00014746072341980271, | |
| "loss": 0.6428, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.26440554339897887, | |
| "grad_norm": 0.19772885739803314, | |
| "learning_rate": 0.0001473876507124589, | |
| "loss": 0.8951, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2647702407002188, | |
| "grad_norm": 0.18301154673099518, | |
| "learning_rate": 0.0001473145780051151, | |
| "loss": 0.8608, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.26513493800145876, | |
| "grad_norm": 0.17923039197921753, | |
| "learning_rate": 0.00014724150529777129, | |
| "loss": 0.674, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.26549963530269877, | |
| "grad_norm": 0.1921747922897339, | |
| "learning_rate": 0.00014716843259042747, | |
| "loss": 0.7752, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.2658643326039387, | |
| "grad_norm": 0.17289309203624725, | |
| "learning_rate": 0.00014709535988308367, | |
| "loss": 0.7519, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.2662290299051787, | |
| "grad_norm": 0.1981101930141449, | |
| "learning_rate": 0.00014702228717573988, | |
| "loss": 0.8573, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.26659372720641866, | |
| "grad_norm": 0.16859839856624603, | |
| "learning_rate": 0.00014694921446839606, | |
| "loss": 0.6, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.26695842450765866, | |
| "grad_norm": 0.15981674194335938, | |
| "learning_rate": 0.00014687614176105225, | |
| "loss": 0.434, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.2673231218088986, | |
| "grad_norm": 0.15087354183197021, | |
| "learning_rate": 0.00014680306905370845, | |
| "loss": 0.4438, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.26768781911013856, | |
| "grad_norm": 0.21009470522403717, | |
| "learning_rate": 0.00014672999634636464, | |
| "loss": 0.92, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.26805251641137856, | |
| "grad_norm": 0.17488998174667358, | |
| "learning_rate": 0.00014665692363902084, | |
| "loss": 0.6819, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.2684172137126185, | |
| "grad_norm": 0.19854167103767395, | |
| "learning_rate": 0.00014658385093167703, | |
| "loss": 0.832, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.2687819110138585, | |
| "grad_norm": 0.17115044593811035, | |
| "learning_rate": 0.0001465107782243332, | |
| "loss": 0.6293, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.26914660831509846, | |
| "grad_norm": 0.14838315546512604, | |
| "learning_rate": 0.00014643770551698941, | |
| "loss": 0.3817, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.26951130561633846, | |
| "grad_norm": 0.1810576319694519, | |
| "learning_rate": 0.0001463646328096456, | |
| "loss": 0.6395, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.2698760029175784, | |
| "grad_norm": 0.2036665976047516, | |
| "learning_rate": 0.0001462915601023018, | |
| "loss": 0.855, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2702407002188184, | |
| "grad_norm": 0.1664070338010788, | |
| "learning_rate": 0.00014621848739495799, | |
| "loss": 0.6324, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.27060539752005836, | |
| "grad_norm": 0.21777962148189545, | |
| "learning_rate": 0.00014614541468761417, | |
| "loss": 0.8487, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2709700948212983, | |
| "grad_norm": 0.16543632745742798, | |
| "learning_rate": 0.00014607234198027038, | |
| "loss": 0.5855, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.2713347921225383, | |
| "grad_norm": 0.18875911831855774, | |
| "learning_rate": 0.00014599926927292658, | |
| "loss": 0.7444, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.27169948942377825, | |
| "grad_norm": 0.19937555491924286, | |
| "learning_rate": 0.00014592619656558274, | |
| "loss": 0.8593, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.27206418672501825, | |
| "grad_norm": 0.17685411870479584, | |
| "learning_rate": 0.00014585312385823895, | |
| "loss": 0.7563, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.2724288840262582, | |
| "grad_norm": 0.16490668058395386, | |
| "learning_rate": 0.00014578005115089515, | |
| "loss": 0.6355, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.2727935813274982, | |
| "grad_norm": 0.17879438400268555, | |
| "learning_rate": 0.00014570697844355134, | |
| "loss": 0.7345, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.27315827862873815, | |
| "grad_norm": 0.15404187142848969, | |
| "learning_rate": 0.00014563390573620754, | |
| "loss": 0.4345, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.2735229759299781, | |
| "grad_norm": 0.18319928646087646, | |
| "learning_rate": 0.00014556083302886373, | |
| "loss": 0.6891, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2738876732312181, | |
| "grad_norm": 0.173502117395401, | |
| "learning_rate": 0.0001454877603215199, | |
| "loss": 0.6031, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.27425237053245805, | |
| "grad_norm": 0.16543245315551758, | |
| "learning_rate": 0.00014541468761417612, | |
| "loss": 0.5698, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.27461706783369805, | |
| "grad_norm": 0.16900089383125305, | |
| "learning_rate": 0.00014534161490683232, | |
| "loss": 0.7195, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.274981765134938, | |
| "grad_norm": 0.1920769065618515, | |
| "learning_rate": 0.00014526854219948848, | |
| "loss": 0.7794, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.275346462436178, | |
| "grad_norm": 0.15764226019382477, | |
| "learning_rate": 0.0001451954694921447, | |
| "loss": 0.5931, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.27571115973741794, | |
| "grad_norm": 0.16820603609085083, | |
| "learning_rate": 0.0001451223967848009, | |
| "loss": 0.5739, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.2760758570386579, | |
| "grad_norm": 0.15538586676120758, | |
| "learning_rate": 0.00014504932407745708, | |
| "loss": 0.4564, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.2764405543398979, | |
| "grad_norm": 0.1703750640153885, | |
| "learning_rate": 0.00014497625137011328, | |
| "loss": 0.6289, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.27680525164113784, | |
| "grad_norm": 0.17181243002414703, | |
| "learning_rate": 0.00014490317866276947, | |
| "loss": 0.6888, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.27716994894237784, | |
| "grad_norm": 0.17971667647361755, | |
| "learning_rate": 0.00014483010595542565, | |
| "loss": 0.6178, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2775346462436178, | |
| "grad_norm": 0.16959045827388763, | |
| "learning_rate": 0.00014475703324808186, | |
| "loss": 0.6029, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.2778993435448578, | |
| "grad_norm": 0.18184369802474976, | |
| "learning_rate": 0.00014468396054073804, | |
| "loss": 0.769, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.27826404084609774, | |
| "grad_norm": 0.18276362121105194, | |
| "learning_rate": 0.00014461088783339422, | |
| "loss": 0.6981, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.2786287381473377, | |
| "grad_norm": 0.14473621547222137, | |
| "learning_rate": 0.00014453781512605043, | |
| "loss": 0.4821, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2789934354485777, | |
| "grad_norm": 0.16732734441757202, | |
| "learning_rate": 0.00014446474241870664, | |
| "loss": 0.5346, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.27935813274981763, | |
| "grad_norm": 0.17172355949878693, | |
| "learning_rate": 0.00014439166971136282, | |
| "loss": 0.6361, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.27972283005105764, | |
| "grad_norm": 0.19567203521728516, | |
| "learning_rate": 0.000144318597004019, | |
| "loss": 0.859, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.2800875273522976, | |
| "grad_norm": 0.1898382008075714, | |
| "learning_rate": 0.0001442455242966752, | |
| "loss": 0.8217, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.2804522246535376, | |
| "grad_norm": 0.1976533979177475, | |
| "learning_rate": 0.0001441724515893314, | |
| "loss": 0.8709, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.28081692195477753, | |
| "grad_norm": 0.15758675336837769, | |
| "learning_rate": 0.0001440993788819876, | |
| "loss": 0.5522, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2811816192560175, | |
| "grad_norm": 0.19185684621334076, | |
| "learning_rate": 0.00014402630617464378, | |
| "loss": 0.7182, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.2815463165572575, | |
| "grad_norm": 0.16831618547439575, | |
| "learning_rate": 0.00014395323346729996, | |
| "loss": 0.5841, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.28191101385849743, | |
| "grad_norm": 0.207386776804924, | |
| "learning_rate": 0.00014388016075995617, | |
| "loss": 0.8525, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.28227571115973743, | |
| "grad_norm": 0.17020228505134583, | |
| "learning_rate": 0.00014380708805261235, | |
| "loss": 0.5534, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.2826404084609774, | |
| "grad_norm": 0.18354672193527222, | |
| "learning_rate": 0.00014373401534526856, | |
| "loss": 0.7006, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2830051057622174, | |
| "grad_norm": 0.16657932102680206, | |
| "learning_rate": 0.00014366094263792474, | |
| "loss": 0.6063, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.2833698030634573, | |
| "grad_norm": 0.15975604951381683, | |
| "learning_rate": 0.00014358786993058092, | |
| "loss": 0.4616, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.28373450036469733, | |
| "grad_norm": 0.17766812443733215, | |
| "learning_rate": 0.00014351479722323713, | |
| "loss": 0.7521, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.2840991976659373, | |
| "grad_norm": 0.18797791004180908, | |
| "learning_rate": 0.00014344172451589334, | |
| "loss": 0.7563, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.2844638949671772, | |
| "grad_norm": 0.17721839249134064, | |
| "learning_rate": 0.0001433686518085495, | |
| "loss": 0.8557, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2848285922684172, | |
| "grad_norm": 0.18058152496814728, | |
| "learning_rate": 0.0001432955791012057, | |
| "loss": 0.6517, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.28519328956965717, | |
| "grad_norm": 0.1826111078262329, | |
| "learning_rate": 0.0001432225063938619, | |
| "loss": 0.6134, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2855579868708972, | |
| "grad_norm": 0.1896212249994278, | |
| "learning_rate": 0.0001431494336865181, | |
| "loss": 0.7902, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.2859226841721371, | |
| "grad_norm": 0.1763574331998825, | |
| "learning_rate": 0.0001430763609791743, | |
| "loss": 0.6796, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.2862873814733771, | |
| "grad_norm": 0.16747136414051056, | |
| "learning_rate": 0.00014300328827183048, | |
| "loss": 0.5986, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.28665207877461707, | |
| "grad_norm": 0.18107502162456512, | |
| "learning_rate": 0.00014293021556448666, | |
| "loss": 0.6232, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.287016776075857, | |
| "grad_norm": 0.17364108562469482, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 0.6362, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.287381473377097, | |
| "grad_norm": 0.21109223365783691, | |
| "learning_rate": 0.00014278407014979908, | |
| "loss": 0.9846, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.28774617067833697, | |
| "grad_norm": 0.1875358521938324, | |
| "learning_rate": 0.00014271099744245523, | |
| "loss": 0.7414, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.28811086797957697, | |
| "grad_norm": 0.17905642092227936, | |
| "learning_rate": 0.00014263792473511144, | |
| "loss": 0.875, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2884755652808169, | |
| "grad_norm": 0.16830816864967346, | |
| "learning_rate": 0.00014256485202776765, | |
| "loss": 0.6712, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.2888402625820569, | |
| "grad_norm": 0.1730622798204422, | |
| "learning_rate": 0.00014249177932042383, | |
| "loss": 0.5806, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.28920495988329686, | |
| "grad_norm": 0.15828505158424377, | |
| "learning_rate": 0.00014241870661308, | |
| "loss": 0.4814, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.2895696571845368, | |
| "grad_norm": 0.16246497631072998, | |
| "learning_rate": 0.00014234563390573622, | |
| "loss": 0.4964, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.2899343544857768, | |
| "grad_norm": 0.1828710287809372, | |
| "learning_rate": 0.0001422725611983924, | |
| "loss": 0.7139, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.29029905178701676, | |
| "grad_norm": 0.15251024067401886, | |
| "learning_rate": 0.0001421994884910486, | |
| "loss": 0.4463, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.29066374908825676, | |
| "grad_norm": 0.17780308425426483, | |
| "learning_rate": 0.0001421264157837048, | |
| "loss": 0.6125, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.2910284463894967, | |
| "grad_norm": 0.19187119603157043, | |
| "learning_rate": 0.00014205334307636097, | |
| "loss": 0.814, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.2913931436907367, | |
| "grad_norm": 0.20423109829425812, | |
| "learning_rate": 0.00014198027036901718, | |
| "loss": 0.8008, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.29175784099197666, | |
| "grad_norm": 0.17211808264255524, | |
| "learning_rate": 0.0001419071976616734, | |
| "loss": 0.5667, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2921225382932166, | |
| "grad_norm": 0.20684252679347992, | |
| "learning_rate": 0.00014183412495432957, | |
| "loss": 0.7646, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.2924872355944566, | |
| "grad_norm": 0.15838950872421265, | |
| "learning_rate": 0.00014176105224698575, | |
| "loss": 0.5542, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.29285193289569655, | |
| "grad_norm": 0.19353005290031433, | |
| "learning_rate": 0.00014168797953964196, | |
| "loss": 0.7211, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.29321663019693656, | |
| "grad_norm": 0.1527222841978073, | |
| "learning_rate": 0.00014161490683229814, | |
| "loss": 0.5183, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.2935813274981765, | |
| "grad_norm": 0.15716342628002167, | |
| "learning_rate": 0.00014154183412495435, | |
| "loss": 0.5197, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2939460247994165, | |
| "grad_norm": 0.2023271769285202, | |
| "learning_rate": 0.00014146876141761053, | |
| "loss": 0.8512, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.29431072210065645, | |
| "grad_norm": 0.17139078676700592, | |
| "learning_rate": 0.0001413956887102667, | |
| "loss": 0.5161, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.2946754194018964, | |
| "grad_norm": 0.15848784148693085, | |
| "learning_rate": 0.00014132261600292292, | |
| "loss": 0.6017, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.2950401167031364, | |
| "grad_norm": 0.1857565939426422, | |
| "learning_rate": 0.0001412495432955791, | |
| "loss": 0.7406, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.29540481400437635, | |
| "grad_norm": 0.17538581788539886, | |
| "learning_rate": 0.0001411764705882353, | |
| "loss": 0.615, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.29576951130561635, | |
| "grad_norm": 0.18965482711791992, | |
| "learning_rate": 0.0001411033978808915, | |
| "loss": 0.7869, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.2961342086068563, | |
| "grad_norm": 0.1757965087890625, | |
| "learning_rate": 0.00014103032517354767, | |
| "loss": 0.7217, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.2964989059080963, | |
| "grad_norm": 0.20843826234340668, | |
| "learning_rate": 0.00014095725246620388, | |
| "loss": 0.9802, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.29686360320933625, | |
| "grad_norm": 0.17155392467975616, | |
| "learning_rate": 0.0001408841797588601, | |
| "loss": 0.67, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.29722830051057625, | |
| "grad_norm": 0.1699131578207016, | |
| "learning_rate": 0.00014081110705151624, | |
| "loss": 0.5432, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.2975929978118162, | |
| "grad_norm": 0.2160065770149231, | |
| "learning_rate": 0.00014073803434417245, | |
| "loss": 0.9022, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.29795769511305614, | |
| "grad_norm": 0.17137810587882996, | |
| "learning_rate": 0.00014066496163682866, | |
| "loss": 0.5853, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.29832239241429614, | |
| "grad_norm": 0.18035869300365448, | |
| "learning_rate": 0.00014059188892948484, | |
| "loss": 0.6308, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.2986870897155361, | |
| "grad_norm": 0.15042151510715485, | |
| "learning_rate": 0.00014051881622214105, | |
| "loss": 0.4335, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.2990517870167761, | |
| "grad_norm": 0.19387514889240265, | |
| "learning_rate": 0.00014044574351479723, | |
| "loss": 0.8104, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.29941648431801604, | |
| "grad_norm": 0.1495147943496704, | |
| "learning_rate": 0.0001403726708074534, | |
| "loss": 0.4551, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.29978118161925604, | |
| "grad_norm": 0.1305743157863617, | |
| "learning_rate": 0.00014029959810010962, | |
| "loss": 0.3016, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.300145878920496, | |
| "grad_norm": 0.18238870799541473, | |
| "learning_rate": 0.00014022652539276583, | |
| "loss": 0.6105, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.30051057622173594, | |
| "grad_norm": 0.1913590431213379, | |
| "learning_rate": 0.00014015345268542198, | |
| "loss": 0.8108, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.30087527352297594, | |
| "grad_norm": 0.15131442248821259, | |
| "learning_rate": 0.0001400803799780782, | |
| "loss": 0.5041, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.3012399708242159, | |
| "grad_norm": 0.15665921568870544, | |
| "learning_rate": 0.0001400073072707344, | |
| "loss": 0.5691, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.3016046681254559, | |
| "grad_norm": 0.1675909459590912, | |
| "learning_rate": 0.00013993423456339058, | |
| "loss": 0.6212, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.30196936542669583, | |
| "grad_norm": 0.2064395546913147, | |
| "learning_rate": 0.00013986116185604676, | |
| "loss": 0.8267, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.30233406272793584, | |
| "grad_norm": 0.18026086688041687, | |
| "learning_rate": 0.00013978808914870297, | |
| "loss": 0.7102, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.3026987600291758, | |
| "grad_norm": 0.18619216978549957, | |
| "learning_rate": 0.00013971501644135915, | |
| "loss": 0.5986, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.30306345733041573, | |
| "grad_norm": 0.20318515598773956, | |
| "learning_rate": 0.00013964194373401536, | |
| "loss": 0.9558, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.30342815463165573, | |
| "grad_norm": 0.1473841667175293, | |
| "learning_rate": 0.00013956887102667154, | |
| "loss": 0.3906, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.3037928519328957, | |
| "grad_norm": 0.1872483193874359, | |
| "learning_rate": 0.00013949579831932772, | |
| "loss": 0.7164, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.3041575492341357, | |
| "grad_norm": 0.1955832690000534, | |
| "learning_rate": 0.00013942272561198393, | |
| "loss": 0.7684, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.30452224653537563, | |
| "grad_norm": 0.17416808009147644, | |
| "learning_rate": 0.00013934965290464014, | |
| "loss": 0.6543, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.30488694383661563, | |
| "grad_norm": 0.18365338444709778, | |
| "learning_rate": 0.00013927658019729632, | |
| "loss": 0.6797, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.3052516411378556, | |
| "grad_norm": 0.159471333026886, | |
| "learning_rate": 0.0001392035074899525, | |
| "loss": 0.5944, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.3056163384390955, | |
| "grad_norm": 0.1895028054714203, | |
| "learning_rate": 0.0001391304347826087, | |
| "loss": 0.7348, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.3059810357403355, | |
| "grad_norm": 0.19214889407157898, | |
| "learning_rate": 0.0001390573620752649, | |
| "loss": 0.8888, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.3063457330415755, | |
| "grad_norm": 0.18355461955070496, | |
| "learning_rate": 0.0001389842893679211, | |
| "loss": 0.7066, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3067104303428155, | |
| "grad_norm": 0.20794498920440674, | |
| "learning_rate": 0.00013891121666057728, | |
| "loss": 0.7818, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.3070751276440554, | |
| "grad_norm": 0.20993681252002716, | |
| "learning_rate": 0.00013883814395323346, | |
| "loss": 0.8961, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.3074398249452954, | |
| "grad_norm": 0.17933285236358643, | |
| "learning_rate": 0.00013876507124588967, | |
| "loss": 0.7162, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.3078045222465354, | |
| "grad_norm": 0.1597253382205963, | |
| "learning_rate": 0.00013869199853854585, | |
| "loss": 0.4908, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.3081692195477753, | |
| "grad_norm": 0.17529501020908356, | |
| "learning_rate": 0.00013861892583120206, | |
| "loss": 0.5254, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3085339168490153, | |
| "grad_norm": 0.19237765669822693, | |
| "learning_rate": 0.00013854585312385824, | |
| "loss": 0.7377, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.30889861415025527, | |
| "grad_norm": 0.18043018877506256, | |
| "learning_rate": 0.00013847278041651442, | |
| "loss": 0.6784, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.30926331145149527, | |
| "grad_norm": 0.1529788225889206, | |
| "learning_rate": 0.00013839970770917063, | |
| "loss": 0.5048, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3096280087527352, | |
| "grad_norm": 0.18892444670200348, | |
| "learning_rate": 0.00013832663500182684, | |
| "loss": 0.7093, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.3099927060539752, | |
| "grad_norm": 0.17523162066936493, | |
| "learning_rate": 0.000138253562294483, | |
| "loss": 0.6947, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.31035740335521517, | |
| "grad_norm": 0.17124755680561066, | |
| "learning_rate": 0.0001381804895871392, | |
| "loss": 0.6009, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.31072210065645517, | |
| "grad_norm": 0.19225868582725525, | |
| "learning_rate": 0.0001381074168797954, | |
| "loss": 0.8058, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.3110867979576951, | |
| "grad_norm": 0.19115358591079712, | |
| "learning_rate": 0.0001380343441724516, | |
| "loss": 0.6689, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.31145149525893506, | |
| "grad_norm": 0.15115682780742645, | |
| "learning_rate": 0.0001379612714651078, | |
| "loss": 0.3348, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.31181619256017507, | |
| "grad_norm": 0.18653741478919983, | |
| "learning_rate": 0.00013788819875776398, | |
| "loss": 0.872, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.312180889861415, | |
| "grad_norm": 0.18661542236804962, | |
| "learning_rate": 0.00013781512605042016, | |
| "loss": 0.9351, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.312545587162655, | |
| "grad_norm": 0.1910124570131302, | |
| "learning_rate": 0.00013774205334307637, | |
| "loss": 0.8575, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.31291028446389496, | |
| "grad_norm": 0.18687231838703156, | |
| "learning_rate": 0.00013766898063573258, | |
| "loss": 0.7134, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.31327498176513496, | |
| "grad_norm": 0.17739079892635345, | |
| "learning_rate": 0.00013759590792838873, | |
| "loss": 0.6473, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.3136396790663749, | |
| "grad_norm": 0.16609562933444977, | |
| "learning_rate": 0.00013752283522104494, | |
| "loss": 0.511, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.31400437636761486, | |
| "grad_norm": 0.17272962629795074, | |
| "learning_rate": 0.00013744976251370115, | |
| "loss": 0.5939, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.31436907366885486, | |
| "grad_norm": 0.196741983294487, | |
| "learning_rate": 0.00013737668980635733, | |
| "loss": 0.6241, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.3147337709700948, | |
| "grad_norm": 0.16731449961662292, | |
| "learning_rate": 0.00013730361709901351, | |
| "loss": 0.6294, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.3150984682713348, | |
| "grad_norm": 0.17899174988269806, | |
| "learning_rate": 0.00013723054439166972, | |
| "loss": 0.724, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.31546316557257476, | |
| "grad_norm": 0.18915481865406036, | |
| "learning_rate": 0.0001371574716843259, | |
| "loss": 0.6508, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.31582786287381476, | |
| "grad_norm": 0.17585769295692444, | |
| "learning_rate": 0.0001370843989769821, | |
| "loss": 0.736, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.3161925601750547, | |
| "grad_norm": 0.16982930898666382, | |
| "learning_rate": 0.0001370113262696383, | |
| "loss": 0.5676, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.31655725747629465, | |
| "grad_norm": 0.17551635205745697, | |
| "learning_rate": 0.00013693825356229447, | |
| "loss": 0.621, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.31692195477753465, | |
| "grad_norm": 0.19762268662452698, | |
| "learning_rate": 0.00013686518085495068, | |
| "loss": 0.872, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.3172866520787746, | |
| "grad_norm": 0.16374340653419495, | |
| "learning_rate": 0.0001367921081476069, | |
| "loss": 0.4612, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3176513493800146, | |
| "grad_norm": 0.20281797647476196, | |
| "learning_rate": 0.00013671903544026307, | |
| "loss": 0.8791, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.31801604668125455, | |
| "grad_norm": 0.1752830445766449, | |
| "learning_rate": 0.00013664596273291925, | |
| "loss": 0.7158, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.31838074398249455, | |
| "grad_norm": 0.1833350956439972, | |
| "learning_rate": 0.00013657289002557546, | |
| "loss": 0.7307, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.3187454412837345, | |
| "grad_norm": 0.158727765083313, | |
| "learning_rate": 0.00013649981731823164, | |
| "loss": 0.4426, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.31911013858497445, | |
| "grad_norm": 0.1881689429283142, | |
| "learning_rate": 0.00013642674461088785, | |
| "loss": 0.6806, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.31947483588621445, | |
| "grad_norm": 0.18638849258422852, | |
| "learning_rate": 0.00013635367190354403, | |
| "loss": 0.5562, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.3198395331874544, | |
| "grad_norm": 0.17578531801700592, | |
| "learning_rate": 0.00013628059919620022, | |
| "loss": 0.6174, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.3202042304886944, | |
| "grad_norm": 0.17400258779525757, | |
| "learning_rate": 0.00013620752648885642, | |
| "loss": 0.6818, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.32056892778993434, | |
| "grad_norm": 0.17320291697978973, | |
| "learning_rate": 0.0001361344537815126, | |
| "loss": 0.59, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.32093362509117435, | |
| "grad_norm": 0.1907418966293335, | |
| "learning_rate": 0.0001360613810741688, | |
| "loss": 0.8411, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3212983223924143, | |
| "grad_norm": 0.1831212341785431, | |
| "learning_rate": 0.000135988308366825, | |
| "loss": 0.7538, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.32166301969365424, | |
| "grad_norm": 0.16207090020179749, | |
| "learning_rate": 0.00013591523565948118, | |
| "loss": 0.5343, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.32202771699489424, | |
| "grad_norm": 0.16630573570728302, | |
| "learning_rate": 0.00013584216295213738, | |
| "loss": 0.5422, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.3223924142961342, | |
| "grad_norm": 0.16689611971378326, | |
| "learning_rate": 0.0001357690902447936, | |
| "loss": 0.682, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.3227571115973742, | |
| "grad_norm": 0.1957065463066101, | |
| "learning_rate": 0.00013569601753744975, | |
| "loss": 0.8429, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.32312180889861414, | |
| "grad_norm": 0.16777260601520538, | |
| "learning_rate": 0.00013562294483010596, | |
| "loss": 0.5122, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.32348650619985414, | |
| "grad_norm": 0.18913322687149048, | |
| "learning_rate": 0.00013554987212276216, | |
| "loss": 0.8262, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.3238512035010941, | |
| "grad_norm": 0.15186062455177307, | |
| "learning_rate": 0.00013547679941541834, | |
| "loss": 0.4259, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.3242159008023341, | |
| "grad_norm": 0.17760516703128815, | |
| "learning_rate": 0.00013540372670807453, | |
| "loss": 0.6425, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.32458059810357404, | |
| "grad_norm": 0.17440609633922577, | |
| "learning_rate": 0.00013533065400073073, | |
| "loss": 0.795, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.324945295404814, | |
| "grad_norm": 0.1680755466222763, | |
| "learning_rate": 0.00013525758129338692, | |
| "loss": 0.6218, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.325309992706054, | |
| "grad_norm": 0.18546250462532043, | |
| "learning_rate": 0.00013518450858604312, | |
| "loss": 0.7299, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.32567469000729393, | |
| "grad_norm": 0.2105245590209961, | |
| "learning_rate": 0.00013511143587869933, | |
| "loss": 0.8526, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.32603938730853393, | |
| "grad_norm": 0.18736779689788818, | |
| "learning_rate": 0.0001350383631713555, | |
| "loss": 0.7205, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.3264040846097739, | |
| "grad_norm": 0.17326989769935608, | |
| "learning_rate": 0.0001349652904640117, | |
| "loss": 0.5746, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3267687819110139, | |
| "grad_norm": 0.21187415719032288, | |
| "learning_rate": 0.0001348922177566679, | |
| "loss": 0.9321, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.32713347921225383, | |
| "grad_norm": 0.18079346418380737, | |
| "learning_rate": 0.00013481914504932409, | |
| "loss": 0.6063, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.3274981765134938, | |
| "grad_norm": 0.2063044011592865, | |
| "learning_rate": 0.00013474607234198027, | |
| "loss": 0.8192, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.3278628738147338, | |
| "grad_norm": 0.1921169012784958, | |
| "learning_rate": 0.00013467299963463647, | |
| "loss": 0.9365, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.3282275711159737, | |
| "grad_norm": 0.19340583682060242, | |
| "learning_rate": 0.00013459992692729266, | |
| "loss": 0.7314, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.32859226841721373, | |
| "grad_norm": 0.19069619476795197, | |
| "learning_rate": 0.00013452685421994886, | |
| "loss": 0.7089, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.3289569657184537, | |
| "grad_norm": 0.1918506920337677, | |
| "learning_rate": 0.00013445378151260507, | |
| "loss": 0.7538, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.3293216630196937, | |
| "grad_norm": 0.18830406665802002, | |
| "learning_rate": 0.00013438070880526123, | |
| "loss": 0.6583, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.3296863603209336, | |
| "grad_norm": 0.18953083455562592, | |
| "learning_rate": 0.00013430763609791744, | |
| "loss": 0.6886, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.33005105762217357, | |
| "grad_norm": 0.1635250300168991, | |
| "learning_rate": 0.00013423456339057364, | |
| "loss": 0.6201, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.3304157549234136, | |
| "grad_norm": 0.19504040479660034, | |
| "learning_rate": 0.00013416149068322983, | |
| "loss": 0.6456, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.3307804522246535, | |
| "grad_norm": 0.1988976150751114, | |
| "learning_rate": 0.000134088417975886, | |
| "loss": 0.7649, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.3311451495258935, | |
| "grad_norm": 0.17592492699623108, | |
| "learning_rate": 0.00013401534526854221, | |
| "loss": 0.6112, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.33150984682713347, | |
| "grad_norm": 0.19126677513122559, | |
| "learning_rate": 0.0001339422725611984, | |
| "loss": 0.7045, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.33187454412837347, | |
| "grad_norm": 0.1643197238445282, | |
| "learning_rate": 0.0001338691998538546, | |
| "loss": 0.4433, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3322392414296134, | |
| "grad_norm": 0.19282923638820648, | |
| "learning_rate": 0.00013379612714651079, | |
| "loss": 0.7357, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.33260393873085337, | |
| "grad_norm": 0.17284700274467468, | |
| "learning_rate": 0.00013372305443916697, | |
| "loss": 0.8092, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.33296863603209337, | |
| "grad_norm": 0.2016737312078476, | |
| "learning_rate": 0.00013364998173182318, | |
| "loss": 0.8372, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.1924041360616684, | |
| "learning_rate": 0.00013357690902447936, | |
| "loss": 0.7576, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.3336980306345733, | |
| "grad_norm": 0.18545666337013245, | |
| "learning_rate": 0.00013350383631713557, | |
| "loss": 0.7559, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.33406272793581326, | |
| "grad_norm": 0.1433466225862503, | |
| "learning_rate": 0.00013343076360979175, | |
| "loss": 0.4667, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.33442742523705327, | |
| "grad_norm": 0.1683466136455536, | |
| "learning_rate": 0.00013335769090244793, | |
| "loss": 0.5951, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.3347921225382932, | |
| "grad_norm": 0.18137226998806, | |
| "learning_rate": 0.00013328461819510414, | |
| "loss": 0.6964, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.33515681983953316, | |
| "grad_norm": 0.19752834737300873, | |
| "learning_rate": 0.00013321154548776034, | |
| "loss": 0.7344, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.33552151714077316, | |
| "grad_norm": 0.17333728075027466, | |
| "learning_rate": 0.0001331384727804165, | |
| "loss": 0.5658, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3358862144420131, | |
| "grad_norm": 0.17877991497516632, | |
| "learning_rate": 0.0001330654000730727, | |
| "loss": 0.6542, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.3362509117432531, | |
| "grad_norm": 0.15863974392414093, | |
| "learning_rate": 0.00013299232736572892, | |
| "loss": 0.4601, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.33661560904449306, | |
| "grad_norm": 0.20868968963623047, | |
| "learning_rate": 0.0001329192546583851, | |
| "loss": 0.7883, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.33698030634573306, | |
| "grad_norm": 0.20271888375282288, | |
| "learning_rate": 0.00013284618195104128, | |
| "loss": 0.9605, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.337345003646973, | |
| "grad_norm": 0.19558852910995483, | |
| "learning_rate": 0.0001327731092436975, | |
| "loss": 0.7866, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.337709700948213, | |
| "grad_norm": 0.2165563404560089, | |
| "learning_rate": 0.00013270003653635367, | |
| "loss": 0.9694, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.33807439824945296, | |
| "grad_norm": 0.18585020303726196, | |
| "learning_rate": 0.00013262696382900988, | |
| "loss": 0.7127, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.3384390955506929, | |
| "grad_norm": 0.15885639190673828, | |
| "learning_rate": 0.00013255389112166608, | |
| "loss": 0.4634, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.3388037928519329, | |
| "grad_norm": 0.18902234733104706, | |
| "learning_rate": 0.00013248081841432224, | |
| "loss": 0.7553, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.33916849015317285, | |
| "grad_norm": 0.1625453233718872, | |
| "learning_rate": 0.00013240774570697845, | |
| "loss": 0.6045, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.33953318745441285, | |
| "grad_norm": 0.1839369386434555, | |
| "learning_rate": 0.00013233467299963466, | |
| "loss": 0.6966, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.3398978847556528, | |
| "grad_norm": 0.1871074140071869, | |
| "learning_rate": 0.00013226160029229084, | |
| "loss": 0.7223, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.3402625820568928, | |
| "grad_norm": 0.19105811417102814, | |
| "learning_rate": 0.00013218852758494702, | |
| "loss": 0.8262, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.34062727935813275, | |
| "grad_norm": 0.19458365440368652, | |
| "learning_rate": 0.00013211545487760323, | |
| "loss": 0.6728, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.3409919766593727, | |
| "grad_norm": 0.17245818674564362, | |
| "learning_rate": 0.0001320423821702594, | |
| "loss": 0.6373, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.3413566739606127, | |
| "grad_norm": 0.17466460168361664, | |
| "learning_rate": 0.00013196930946291562, | |
| "loss": 0.6054, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.34172137126185265, | |
| "grad_norm": 0.1496109813451767, | |
| "learning_rate": 0.0001318962367555718, | |
| "loss": 0.5585, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.34208606856309265, | |
| "grad_norm": 0.16135789453983307, | |
| "learning_rate": 0.00013182316404822798, | |
| "loss": 0.4524, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.3424507658643326, | |
| "grad_norm": 0.18663141131401062, | |
| "learning_rate": 0.0001317500913408842, | |
| "loss": 0.6951, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.3428154631655726, | |
| "grad_norm": 0.15193338692188263, | |
| "learning_rate": 0.0001316770186335404, | |
| "loss": 0.5151, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.34318016046681254, | |
| "grad_norm": 0.16860604286193848, | |
| "learning_rate": 0.00013160394592619658, | |
| "loss": 0.5776, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.3435448577680525, | |
| "grad_norm": 0.18972420692443848, | |
| "learning_rate": 0.00013153087321885276, | |
| "loss": 0.7691, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.3439095550692925, | |
| "grad_norm": 0.196933776140213, | |
| "learning_rate": 0.00013145780051150897, | |
| "loss": 0.8165, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.34427425237053244, | |
| "grad_norm": 0.19200679659843445, | |
| "learning_rate": 0.00013138472780416515, | |
| "loss": 0.7874, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.34463894967177244, | |
| "grad_norm": 0.1795893758535385, | |
| "learning_rate": 0.00013131165509682136, | |
| "loss": 0.7507, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.3450036469730124, | |
| "grad_norm": 0.18410655856132507, | |
| "learning_rate": 0.00013123858238947754, | |
| "loss": 0.7354, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.3453683442742524, | |
| "grad_norm": 0.1739976406097412, | |
| "learning_rate": 0.00013116550968213372, | |
| "loss": 0.6932, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.34573304157549234, | |
| "grad_norm": 0.14160172641277313, | |
| "learning_rate": 0.00013109243697478993, | |
| "loss": 0.351, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.3460977388767323, | |
| "grad_norm": 0.14415137469768524, | |
| "learning_rate": 0.0001310193642674461, | |
| "loss": 0.4202, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.3464624361779723, | |
| "grad_norm": 0.2061617225408554, | |
| "learning_rate": 0.00013094629156010232, | |
| "loss": 0.9679, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.34682713347921224, | |
| "grad_norm": 0.20319141447544098, | |
| "learning_rate": 0.0001308732188527585, | |
| "loss": 0.8076, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.34719183078045224, | |
| "grad_norm": 0.17571642994880676, | |
| "learning_rate": 0.00013080014614541468, | |
| "loss": 0.6941, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.3475565280816922, | |
| "grad_norm": 0.177334725856781, | |
| "learning_rate": 0.0001307270734380709, | |
| "loss": 0.7511, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.3479212253829322, | |
| "grad_norm": 0.2112066000699997, | |
| "learning_rate": 0.0001306540007307271, | |
| "loss": 1.0981, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.34828592268417213, | |
| "grad_norm": 0.18469132483005524, | |
| "learning_rate": 0.00013058092802338325, | |
| "loss": 0.8159, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.3486506199854121, | |
| "grad_norm": 0.17193461954593658, | |
| "learning_rate": 0.00013050785531603946, | |
| "loss": 0.6005, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.3490153172866521, | |
| "grad_norm": 0.21006590127944946, | |
| "learning_rate": 0.00013043478260869567, | |
| "loss": 1.0101, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.34938001458789203, | |
| "grad_norm": 0.1526053547859192, | |
| "learning_rate": 0.00013036170990135185, | |
| "loss": 0.5219, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.34974471188913203, | |
| "grad_norm": 0.18546460568904877, | |
| "learning_rate": 0.00013028863719400803, | |
| "loss": 0.7272, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.350109409190372, | |
| "grad_norm": 0.1910969465970993, | |
| "learning_rate": 0.00013021556448666424, | |
| "loss": 0.8131, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.350474106491612, | |
| "grad_norm": 0.17536579072475433, | |
| "learning_rate": 0.00013014249177932042, | |
| "loss": 0.62, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.3508388037928519, | |
| "grad_norm": 0.18440979719161987, | |
| "learning_rate": 0.00013006941907197663, | |
| "loss": 0.7427, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.35120350109409193, | |
| "grad_norm": 0.1736113727092743, | |
| "learning_rate": 0.00012999634636463284, | |
| "loss": 0.6271, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.3515681983953319, | |
| "grad_norm": 0.1437050998210907, | |
| "learning_rate": 0.000129923273657289, | |
| "loss": 0.448, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.3519328956965718, | |
| "grad_norm": 0.17444917559623718, | |
| "learning_rate": 0.0001298502009499452, | |
| "loss": 0.5675, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.3522975929978118, | |
| "grad_norm": 0.2057693749666214, | |
| "learning_rate": 0.0001297771282426014, | |
| "loss": 0.9973, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.3526622902990518, | |
| "grad_norm": 0.1640542894601822, | |
| "learning_rate": 0.0001297040555352576, | |
| "loss": 0.5839, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.3530269876002918, | |
| "grad_norm": 0.1888854056596756, | |
| "learning_rate": 0.00012963098282791377, | |
| "loss": 0.7098, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.3533916849015317, | |
| "grad_norm": 0.1694556325674057, | |
| "learning_rate": 0.00012955791012056998, | |
| "loss": 0.5281, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.3537563822027717, | |
| "grad_norm": 0.17687252163887024, | |
| "learning_rate": 0.00012948483741322616, | |
| "loss": 0.6114, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.35412107950401167, | |
| "grad_norm": 0.1951674520969391, | |
| "learning_rate": 0.00012941176470588237, | |
| "loss": 0.8128, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.3544857768052516, | |
| "grad_norm": 0.20023071765899658, | |
| "learning_rate": 0.00012933869199853855, | |
| "loss": 0.8015, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.3548504741064916, | |
| "grad_norm": 0.18741564452648163, | |
| "learning_rate": 0.00012926561929119473, | |
| "loss": 0.7011, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.35521517140773157, | |
| "grad_norm": 0.13944192230701447, | |
| "learning_rate": 0.00012919254658385094, | |
| "loss": 0.365, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.35557986870897157, | |
| "grad_norm": 0.20607557892799377, | |
| "learning_rate": 0.00012911947387650715, | |
| "loss": 0.89, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.3559445660102115, | |
| "grad_norm": 0.2182752937078476, | |
| "learning_rate": 0.00012904640116916333, | |
| "loss": 0.9494, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.3563092633114515, | |
| "grad_norm": 0.18262708187103271, | |
| "learning_rate": 0.0001289733284618195, | |
| "loss": 0.6899, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.35667396061269147, | |
| "grad_norm": 0.18693357706069946, | |
| "learning_rate": 0.00012890025575447572, | |
| "loss": 0.7838, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.3570386579139314, | |
| "grad_norm": 0.19558003544807434, | |
| "learning_rate": 0.0001288271830471319, | |
| "loss": 0.6937, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.3574033552151714, | |
| "grad_norm": 0.1773812621831894, | |
| "learning_rate": 0.0001287541103397881, | |
| "loss": 0.6103, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.35776805251641136, | |
| "grad_norm": 0.19865770637989044, | |
| "learning_rate": 0.0001286810376324443, | |
| "loss": 0.884, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.35813274981765136, | |
| "grad_norm": 0.17361897230148315, | |
| "learning_rate": 0.00012860796492510047, | |
| "loss": 0.6312, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.3584974471188913, | |
| "grad_norm": 0.17946158349514008, | |
| "learning_rate": 0.00012853489221775668, | |
| "loss": 0.7898, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.3588621444201313, | |
| "grad_norm": 0.1990206092596054, | |
| "learning_rate": 0.00012846181951041286, | |
| "loss": 0.821, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.35922684172137126, | |
| "grad_norm": 0.16487091779708862, | |
| "learning_rate": 0.00012838874680306904, | |
| "loss": 0.6642, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.3595915390226112, | |
| "grad_norm": 0.17961853742599487, | |
| "learning_rate": 0.00012831567409572525, | |
| "loss": 0.8118, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.3599562363238512, | |
| "grad_norm": 0.16912826895713806, | |
| "learning_rate": 0.00012824260138838143, | |
| "loss": 0.6378, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.36032093362509116, | |
| "grad_norm": 0.19518031179904938, | |
| "learning_rate": 0.00012816952868103764, | |
| "loss": 0.9833, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.36068563092633116, | |
| "grad_norm": 0.18844519555568695, | |
| "learning_rate": 0.00012809645597369385, | |
| "loss": 0.7191, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.3610503282275711, | |
| "grad_norm": 0.18332988023757935, | |
| "learning_rate": 0.00012802338326635, | |
| "loss": 0.7232, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3614150255288111, | |
| "grad_norm": 0.18674488365650177, | |
| "learning_rate": 0.0001279503105590062, | |
| "loss": 0.7617, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.36177972283005105, | |
| "grad_norm": 0.15862171351909637, | |
| "learning_rate": 0.00012787723785166242, | |
| "loss": 0.5411, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.36214442013129106, | |
| "grad_norm": 0.15820543467998505, | |
| "learning_rate": 0.0001278041651443186, | |
| "loss": 0.5198, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.362509117432531, | |
| "grad_norm": 0.19230028986930847, | |
| "learning_rate": 0.00012773109243697478, | |
| "loss": 0.7863, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.36287381473377095, | |
| "grad_norm": 0.18754363059997559, | |
| "learning_rate": 0.000127658019729631, | |
| "loss": 0.7216, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.36323851203501095, | |
| "grad_norm": 0.19463679194450378, | |
| "learning_rate": 0.00012758494702228717, | |
| "loss": 0.8963, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.3636032093362509, | |
| "grad_norm": 0.17682290077209473, | |
| "learning_rate": 0.00012751187431494338, | |
| "loss": 0.7009, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.3639679066374909, | |
| "grad_norm": 0.20536212623119354, | |
| "learning_rate": 0.0001274388016075996, | |
| "loss": 0.9973, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.36433260393873085, | |
| "grad_norm": 0.2060231864452362, | |
| "learning_rate": 0.00012736572890025574, | |
| "loss": 0.8184, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.36469730123997085, | |
| "grad_norm": 0.19924308359622955, | |
| "learning_rate": 0.00012729265619291195, | |
| "loss": 0.7663, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2742, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.490016210832998e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |