| { |
| "epoch": 3.717070851165002, |
| "global_step": 3910, |
| "max_steps": 4208, |
| "logging_steps": 5, |
| "eval_steps": 300, |
| "save_steps": 300, |
| "train_batch_size": 16, |
| "num_train_epochs": 4, |
| "num_input_tokens_seen": 0, |
| "total_flos": 9.067553930774016e+17, |
| "log_history": [ |
| { |
| "loss": 2.7085, |
| "grad_norm": 5.54561185836792, |
| "learning_rate": 3.791469194312797e-06, |
| "epoch": 0.004755111745126011, |
| "step": 5 |
| }, |
| { |
| "loss": 2.5802, |
| "grad_norm": 4.10910701751709, |
| "learning_rate": 8.530805687203793e-06, |
| "epoch": 0.009510223490252021, |
| "step": 10 |
| }, |
| { |
| "loss": 2.5039, |
| "grad_norm": 2.7873151302337646, |
| "learning_rate": 1.3270142180094788e-05, |
| "epoch": 0.014265335235378032, |
| "step": 15 |
| }, |
| { |
| "loss": 2.375, |
| "grad_norm": 2.1309990882873535, |
| "learning_rate": 1.8009478672985784e-05, |
| "epoch": 0.019020446980504042, |
| "step": 20 |
| }, |
| { |
| "loss": 2.2621, |
| "grad_norm": 1.3047362565994263, |
| "learning_rate": 2.274881516587678e-05, |
| "epoch": 0.02377555872563005, |
| "step": 25 |
| }, |
| { |
| "loss": 2.159, |
| "grad_norm": 1.2718138694763184, |
| "learning_rate": 2.7488151658767774e-05, |
| "epoch": 0.028530670470756064, |
| "step": 30 |
| }, |
| { |
| "loss": 2.0731, |
| "grad_norm": 1.071070909500122, |
| "learning_rate": 3.222748815165877e-05, |
| "epoch": 0.033285782215882076, |
| "step": 35 |
| }, |
| { |
| "loss": 1.9968, |
| "grad_norm": 0.9270461201667786, |
| "learning_rate": 3.696682464454976e-05, |
| "epoch": 0.038040893961008085, |
| "step": 40 |
| }, |
| { |
| "loss": 1.9043, |
| "grad_norm": 0.9485486745834351, |
| "learning_rate": 4.1706161137440756e-05, |
| "epoch": 0.042796005706134094, |
| "step": 45 |
| }, |
| { |
| "loss": 1.8417, |
| "grad_norm": 0.9114959239959717, |
| "learning_rate": 4.644549763033176e-05, |
| "epoch": 0.0475511174512601, |
| "step": 50 |
| }, |
| { |
| "loss": 1.7801, |
| "grad_norm": 0.757665753364563, |
| "learning_rate": 5.1184834123222746e-05, |
| "epoch": 0.05230622919638612, |
| "step": 55 |
| }, |
| { |
| "loss": 1.7447, |
| "grad_norm": 0.8645685911178589, |
| "learning_rate": 5.592417061611375e-05, |
| "epoch": 0.05706134094151213, |
| "step": 60 |
| }, |
| { |
| "loss": 1.7381, |
| "grad_norm": 0.8405068516731262, |
| "learning_rate": 6.066350710900475e-05, |
| "epoch": 0.061816452686638136, |
| "step": 65 |
| }, |
| { |
| "loss": 1.7241, |
| "grad_norm": 0.6960498690605164, |
| "learning_rate": 6.540284360189573e-05, |
| "epoch": 0.06657156443176415, |
| "step": 70 |
| }, |
| { |
| "loss": 1.6682, |
| "grad_norm": 0.6851388812065125, |
| "learning_rate": 7.014218009478673e-05, |
| "epoch": 0.07132667617689016, |
| "step": 75 |
| }, |
| { |
| "loss": 1.6389, |
| "grad_norm": 0.7017823457717896, |
| "learning_rate": 7.488151658767772e-05, |
| "epoch": 0.07608178792201617, |
| "step": 80 |
| }, |
| { |
| "loss": 1.5965, |
| "grad_norm": 0.8321701288223267, |
| "learning_rate": 7.962085308056872e-05, |
| "epoch": 0.08083689966714218, |
| "step": 85 |
| }, |
| { |
| "loss": 1.6071, |
| "grad_norm": 0.7419420480728149, |
| "learning_rate": 8.436018957345973e-05, |
| "epoch": 0.08559201141226819, |
| "step": 90 |
| }, |
| { |
| "loss": 1.5444, |
| "grad_norm": 0.7766271829605103, |
| "learning_rate": 8.909952606635071e-05, |
| "epoch": 0.0903471231573942, |
| "step": 95 |
| }, |
| { |
| "loss": 1.5723, |
| "grad_norm": 0.6929298639297485, |
| "learning_rate": 9.383886255924172e-05, |
| "epoch": 0.0951022349025202, |
| "step": 100 |
| }, |
| { |
| "loss": 1.5401, |
| "grad_norm": 0.7620290517807007, |
| "learning_rate": 9.85781990521327e-05, |
| "epoch": 0.09985734664764621, |
| "step": 105 |
| }, |
| { |
| "loss": 1.5336, |
| "grad_norm": 0.7455782294273376, |
| "learning_rate": 0.0001033175355450237, |
| "epoch": 0.10461245839277224, |
| "step": 110 |
| }, |
| { |
| "loss": 1.5158, |
| "grad_norm": 0.7211477160453796, |
| "learning_rate": 0.00010805687203791471, |
| "epoch": 0.10936757013789825, |
| "step": 115 |
| }, |
| { |
| "loss": 1.502, |
| "grad_norm": 0.9385116696357727, |
| "learning_rate": 0.0001127962085308057, |
| "epoch": 0.11412268188302425, |
| "step": 120 |
| }, |
| { |
| "loss": 1.4808, |
| "grad_norm": 0.8207523226737976, |
| "learning_rate": 0.00011753554502369668, |
| "epoch": 0.11887779362815026, |
| "step": 125 |
| }, |
| { |
| "loss": 1.4963, |
| "grad_norm": 0.7325614094734192, |
| "learning_rate": 0.0001222748815165877, |
| "epoch": 0.12363290537327627, |
| "step": 130 |
| }, |
| { |
| "loss": 1.4897, |
| "grad_norm": 0.7729589939117432, |
| "learning_rate": 0.0001270142180094787, |
| "epoch": 0.12838801711840228, |
| "step": 135 |
| }, |
| { |
| "loss": 1.4906, |
| "grad_norm": 0.8618285655975342, |
| "learning_rate": 0.0001317535545023697, |
| "epoch": 0.1331431288635283, |
| "step": 140 |
| }, |
| { |
| "loss": 1.4451, |
| "grad_norm": 0.8186561465263367, |
| "learning_rate": 0.0001364928909952607, |
| "epoch": 0.1378982406086543, |
| "step": 145 |
| }, |
| { |
| "loss": 1.4308, |
| "grad_norm": 0.7613979578018188, |
| "learning_rate": 0.00014123222748815167, |
| "epoch": 0.14265335235378032, |
| "step": 150 |
| }, |
| { |
| "loss": 1.4268, |
| "grad_norm": 0.8574560284614563, |
| "learning_rate": 0.00014597156398104267, |
| "epoch": 0.14740846409890632, |
| "step": 155 |
| }, |
| { |
| "loss": 1.4481, |
| "grad_norm": 0.9759908318519592, |
| "learning_rate": 0.00015071090047393367, |
| "epoch": 0.15216357584403234, |
| "step": 160 |
| }, |
| { |
| "loss": 1.463, |
| "grad_norm": 0.8282021284103394, |
| "learning_rate": 0.00015545023696682465, |
| "epoch": 0.15691868758915833, |
| "step": 165 |
| }, |
| { |
| "loss": 1.4146, |
| "grad_norm": 0.8971031308174133, |
| "learning_rate": 0.00016018957345971565, |
| "epoch": 0.16167379933428436, |
| "step": 170 |
| }, |
| { |
| "loss": 1.4248, |
| "grad_norm": 0.7906368374824524, |
| "learning_rate": 0.00016492890995260665, |
| "epoch": 0.16642891107941038, |
| "step": 175 |
| }, |
| { |
| "loss": 1.4361, |
| "grad_norm": 0.757980465888977, |
| "learning_rate": 0.00016966824644549762, |
| "epoch": 0.17118402282453637, |
| "step": 180 |
| }, |
| { |
| "loss": 1.4175, |
| "grad_norm": 0.7388699650764465, |
| "learning_rate": 0.00017440758293838863, |
| "epoch": 0.1759391345696624, |
| "step": 185 |
| }, |
| { |
| "loss": 1.4038, |
| "grad_norm": 0.7854629755020142, |
| "learning_rate": 0.00017914691943127963, |
| "epoch": 0.1806942463147884, |
| "step": 190 |
| }, |
| { |
| "loss": 1.4266, |
| "grad_norm": 0.7242233753204346, |
| "learning_rate": 0.0001838862559241706, |
| "epoch": 0.18544935805991442, |
| "step": 195 |
| }, |
| { |
| "loss": 1.4374, |
| "grad_norm": 0.6643710136413574, |
| "learning_rate": 0.0001886255924170616, |
| "epoch": 0.1902044698050404, |
| "step": 200 |
| }, |
| { |
| "loss": 1.3591, |
| "grad_norm": 0.7907650470733643, |
| "learning_rate": 0.0001933649289099526, |
| "epoch": 0.19495958155016643, |
| "step": 205 |
| }, |
| { |
| "loss": 1.3909, |
| "grad_norm": 0.6639952063560486, |
| "learning_rate": 0.0001981042654028436, |
| "epoch": 0.19971469329529243, |
| "step": 210 |
| }, |
| { |
| "loss": 1.4192, |
| "grad_norm": 0.7232818603515625, |
| "learning_rate": 0.0001999997220006622, |
| "epoch": 0.20446980504041845, |
| "step": 215 |
| }, |
| { |
| "loss": 1.3741, |
| "grad_norm": 0.7904821038246155, |
| "learning_rate": 0.0001999980231214176, |
| "epoch": 0.20922491678554447, |
| "step": 220 |
| }, |
| { |
| "loss": 1.3645, |
| "grad_norm": 0.7556551098823547, |
| "learning_rate": 0.00019999477983321127, |
| "epoch": 0.21398002853067047, |
| "step": 225 |
| }, |
| { |
| "loss": 1.3785, |
| "grad_norm": 0.764137864112854, |
| "learning_rate": 0.0001999899921861339, |
| "epoch": 0.2187351402757965, |
| "step": 230 |
| }, |
| { |
| "loss": 1.3675, |
| "grad_norm": 0.6528745889663696, |
| "learning_rate": 0.00019998366025412772, |
| "epoch": 0.2234902520209225, |
| "step": 235 |
| }, |
| { |
| "loss": 1.375, |
| "grad_norm": 0.6686012148857117, |
| "learning_rate": 0.00019997578413498564, |
| "epoch": 0.2282453637660485, |
| "step": 240 |
| }, |
| { |
| "loss": 1.3973, |
| "grad_norm": 0.6628036499023438, |
| "learning_rate": 0.00019996636395034958, |
| "epoch": 0.2330004755111745, |
| "step": 245 |
| }, |
| { |
| "loss": 1.3779, |
| "grad_norm": 0.7726995944976807, |
| "learning_rate": 0.00019995539984570855, |
| "epoch": 0.23775558725630053, |
| "step": 250 |
| }, |
| { |
| "loss": 1.372, |
| "grad_norm": 0.7705885171890259, |
| "learning_rate": 0.00019994289199039652, |
| "epoch": 0.24251069900142652, |
| "step": 255 |
| }, |
| { |
| "loss": 1.335, |
| "grad_norm": 0.6389305591583252, |
| "learning_rate": 0.00019992884057758973, |
| "epoch": 0.24726581074655254, |
| "step": 260 |
| }, |
| { |
| "loss": 1.3496, |
| "grad_norm": 0.6854783296585083, |
| "learning_rate": 0.00019991324582430378, |
| "epoch": 0.25202092249167857, |
| "step": 265 |
| }, |
| { |
| "loss": 1.3527, |
| "grad_norm": 0.6458051800727844, |
| "learning_rate": 0.00019989610797139018, |
| "epoch": 0.25677603423680456, |
| "step": 270 |
| }, |
| { |
| "loss": 1.3577, |
| "grad_norm": 0.6870977282524109, |
| "learning_rate": 0.00019987742728353274, |
| "epoch": 0.26153114598193056, |
| "step": 275 |
| }, |
| { |
| "loss": 1.3241, |
| "grad_norm": 0.6708517670631409, |
| "learning_rate": 0.00019985720404924328, |
| "epoch": 0.2662862577270566, |
| "step": 280 |
| }, |
| { |
| "loss": 1.3452, |
| "grad_norm": 0.7562671899795532, |
| "learning_rate": 0.00019983543858085754, |
| "epoch": 0.2710413694721826, |
| "step": 285 |
| }, |
| { |
| "loss": 1.3392, |
| "grad_norm": 0.6730847954750061, |
| "learning_rate": 0.00019981213121452994, |
| "epoch": 0.2757964812173086, |
| "step": 290 |
| }, |
| { |
| "loss": 1.3525, |
| "grad_norm": 0.6527723670005798, |
| "learning_rate": 0.00019978728231022865, |
| "epoch": 0.2805515929624346, |
| "step": 295 |
| }, |
| { |
| "loss": 1.3287, |
| "grad_norm": 0.7036499381065369, |
| "learning_rate": 0.00019976089225173, |
| "epoch": 0.28530670470756064, |
| "step": 300 |
| }, |
| { |
| "loss": 1.3333, |
| "grad_norm": 0.6516985297203064, |
| "learning_rate": 0.00019973296144661245, |
| "epoch": 0.29006181645268664, |
| "step": 305 |
| }, |
| { |
| "loss": 1.3329, |
| "grad_norm": 0.6009252667427063, |
| "learning_rate": 0.00019970349032625035, |
| "epoch": 0.29481692819781263, |
| "step": 310 |
| }, |
| { |
| "loss": 1.3076, |
| "grad_norm": 0.6210832595825195, |
| "learning_rate": 0.00019967247934580737, |
| "epoch": 0.2995720399429387, |
| "step": 315 |
| }, |
| { |
| "loss": 1.3423, |
| "grad_norm": 0.7375179529190063, |
| "learning_rate": 0.00019963992898422935, |
| "epoch": 0.3043271516880647, |
| "step": 320 |
| }, |
| { |
| "loss": 1.3269, |
| "grad_norm": 0.6299087405204773, |
| "learning_rate": 0.0001996058397442369, |
| "epoch": 0.3090822634331907, |
| "step": 325 |
| }, |
| { |
| "loss": 1.317, |
| "grad_norm": 0.6354871392250061, |
| "learning_rate": 0.0001995702121523177, |
| "epoch": 0.31383737517831667, |
| "step": 330 |
| }, |
| { |
| "loss": 1.3015, |
| "grad_norm": 0.5692774653434753, |
| "learning_rate": 0.00019953304675871835, |
| "epoch": 0.3185924869234427, |
| "step": 335 |
| }, |
| { |
| "loss": 1.3097, |
| "grad_norm": 0.5829651951789856, |
| "learning_rate": 0.0001994943441374359, |
| "epoch": 0.3233475986685687, |
| "step": 340 |
| }, |
| { |
| "loss": 1.3125, |
| "grad_norm": 0.6025662422180176, |
| "learning_rate": 0.0001994541048862089, |
| "epoch": 0.3281027104136947, |
| "step": 345 |
| }, |
| { |
| "loss": 1.3381, |
| "grad_norm": 0.6604218482971191, |
| "learning_rate": 0.00019941232962650824, |
| "epoch": 0.33285782215882076, |
| "step": 350 |
| }, |
| { |
| "loss": 1.3159, |
| "grad_norm": 0.6200311779975891, |
| "learning_rate": 0.0001993690190035275, |
| "epoch": 0.33761293390394675, |
| "step": 355 |
| }, |
| { |
| "loss": 1.296, |
| "grad_norm": 0.622523307800293, |
| "learning_rate": 0.0001993241736861731, |
| "epoch": 0.34236804564907275, |
| "step": 360 |
| }, |
| { |
| "loss": 1.2865, |
| "grad_norm": 0.5812192559242249, |
| "learning_rate": 0.00019927779436705375, |
| "epoch": 0.34712315739419874, |
| "step": 365 |
| }, |
| { |
| "loss": 1.3121, |
| "grad_norm": 0.5977731943130493, |
| "learning_rate": 0.00019922988176247007, |
| "epoch": 0.3518782691393248, |
| "step": 370 |
| }, |
| { |
| "loss": 1.2837, |
| "grad_norm": 0.6188873648643494, |
| "learning_rate": 0.00019918043661240321, |
| "epoch": 0.3566333808844508, |
| "step": 375 |
| }, |
| { |
| "loss": 1.2902, |
| "grad_norm": 0.6301112771034241, |
| "learning_rate": 0.00019912945968050355, |
| "epoch": 0.3613884926295768, |
| "step": 380 |
| }, |
| { |
| "loss": 1.302, |
| "grad_norm": 0.6640788912773132, |
| "learning_rate": 0.00019907695175407902, |
| "epoch": 0.3661436043747028, |
| "step": 385 |
| }, |
| { |
| "loss": 1.3021, |
| "grad_norm": 0.61163729429245, |
| "learning_rate": 0.00019902291364408273, |
| "epoch": 0.37089871611982883, |
| "step": 390 |
| }, |
| { |
| "loss": 1.302, |
| "grad_norm": 0.5758511424064636, |
| "learning_rate": 0.0001989673461851006, |
| "epoch": 0.3756538278649548, |
| "step": 395 |
| }, |
| { |
| "loss": 1.291, |
| "grad_norm": 0.5665921568870544, |
| "learning_rate": 0.00019891025023533848, |
| "epoch": 0.3804089396100808, |
| "step": 400 |
| }, |
| { |
| "loss": 1.2941, |
| "grad_norm": 0.6631971001625061, |
| "learning_rate": 0.0001988516266766087, |
| "epoch": 0.38516405135520687, |
| "step": 405 |
| }, |
| { |
| "loss": 1.3179, |
| "grad_norm": 0.6042570471763611, |
| "learning_rate": 0.0001987914764143166, |
| "epoch": 0.38991916310033287, |
| "step": 410 |
| }, |
| { |
| "loss": 1.263, |
| "grad_norm": 0.5800480246543884, |
| "learning_rate": 0.0001987298003774467, |
| "epoch": 0.39467427484545886, |
| "step": 415 |
| }, |
| { |
| "loss": 1.2983, |
| "grad_norm": 0.5521400570869446, |
| "learning_rate": 0.000198666599518548, |
| "epoch": 0.39942938659058486, |
| "step": 420 |
| }, |
| { |
| "loss": 1.2512, |
| "grad_norm": 0.6025983095169067, |
| "learning_rate": 0.00019860187481371956, |
| "epoch": 0.4041844983357109, |
| "step": 425 |
| }, |
| { |
| "loss": 1.2727, |
| "grad_norm": 0.6759335398674011, |
| "learning_rate": 0.00019853562726259525, |
| "epoch": 0.4089396100808369, |
| "step": 430 |
| }, |
| { |
| "loss": 1.262, |
| "grad_norm": 0.5532102584838867, |
| "learning_rate": 0.00019846785788832843, |
| "epoch": 0.4136947218259629, |
| "step": 435 |
| }, |
| { |
| "loss": 1.2641, |
| "grad_norm": 0.6512478590011597, |
| "learning_rate": 0.0001983985677375761, |
| "epoch": 0.41844983357108895, |
| "step": 440 |
| }, |
| { |
| "loss": 1.2709, |
| "grad_norm": 0.6183508038520813, |
| "learning_rate": 0.0001983277578804827, |
| "epoch": 0.42320494531621494, |
| "step": 445 |
| }, |
| { |
| "loss": 1.2539, |
| "grad_norm": 0.5382626056671143, |
| "learning_rate": 0.0001982554294106636, |
| "epoch": 0.42796005706134094, |
| "step": 450 |
| }, |
| { |
| "loss": 1.2787, |
| "grad_norm": 0.6323602199554443, |
| "learning_rate": 0.00019818158344518829, |
| "epoch": 0.43271516880646693, |
| "step": 455 |
| }, |
| { |
| "loss": 1.2737, |
| "grad_norm": 0.5680716633796692, |
| "learning_rate": 0.000198106221124563, |
| "epoch": 0.437470280551593, |
| "step": 460 |
| }, |
| { |
| "loss": 1.2818, |
| "grad_norm": 0.5795097351074219, |
| "learning_rate": 0.00019802934361271324, |
| "epoch": 0.442225392296719, |
| "step": 465 |
| }, |
| { |
| "loss": 1.2555, |
| "grad_norm": 0.6733765602111816, |
| "learning_rate": 0.00019795095209696554, |
| "epoch": 0.446980504041845, |
| "step": 470 |
| }, |
| { |
| "loss": 1.2307, |
| "grad_norm": 0.5949432253837585, |
| "learning_rate": 0.00019787104778802955, |
| "epoch": 0.45173561578697097, |
| "step": 475 |
| }, |
| { |
| "loss": 1.2638, |
| "grad_norm": 0.658086359500885, |
| "learning_rate": 0.00019778963191997888, |
| "epoch": 0.456490727532097, |
| "step": 480 |
| }, |
| { |
| "loss": 1.2706, |
| "grad_norm": 0.5770449638366699, |
| "learning_rate": 0.00019770670575023242, |
| "epoch": 0.461245839277223, |
| "step": 485 |
| }, |
| { |
| "loss": 1.2479, |
| "grad_norm": 0.6657362580299377, |
| "learning_rate": 0.00019762227055953463, |
| "epoch": 0.466000951022349, |
| "step": 490 |
| }, |
| { |
| "loss": 1.2579, |
| "grad_norm": 0.6296879649162292, |
| "learning_rate": 0.000197536327651936, |
| "epoch": 0.47075606276747506, |
| "step": 495 |
| }, |
| { |
| "loss": 1.2476, |
| "grad_norm": 0.5844238996505737, |
| "learning_rate": 0.00019744887835477267, |
| "epoch": 0.47551117451260105, |
| "step": 500 |
| }, |
| { |
| "loss": 1.2198, |
| "grad_norm": 0.556000828742981, |
| "learning_rate": 0.0001973599240186462, |
| "epoch": 0.48026628625772705, |
| "step": 505 |
| }, |
| { |
| "loss": 1.2741, |
| "grad_norm": 0.5684508085250854, |
| "learning_rate": 0.00019726946601740243, |
| "epoch": 0.48502139800285304, |
| "step": 510 |
| }, |
| { |
| "loss": 1.2867, |
| "grad_norm": 0.6184570789337158, |
| "learning_rate": 0.00019717750574811048, |
| "epoch": 0.4897765097479791, |
| "step": 515 |
| }, |
| { |
| "loss": 1.2491, |
| "grad_norm": 0.5740643739700317, |
| "learning_rate": 0.00019708404463104104, |
| "epoch": 0.4945316214931051, |
| "step": 520 |
| }, |
| { |
| "loss": 1.255, |
| "grad_norm": 0.5657426714897156, |
| "learning_rate": 0.00019698908410964455, |
| "epoch": 0.4992867332382311, |
| "step": 525 |
| }, |
| { |
| "loss": 1.2568, |
| "grad_norm": 0.554580807685852, |
| "learning_rate": 0.0001968926256505288, |
| "epoch": 0.5040418449833571, |
| "step": 530 |
| }, |
| { |
| "loss": 1.267, |
| "grad_norm": 0.5561890006065369, |
| "learning_rate": 0.00019679467074343627, |
| "epoch": 0.5087969567284831, |
| "step": 535 |
| }, |
| { |
| "loss": 1.236, |
| "grad_norm": 0.620495080947876, |
| "learning_rate": 0.0001966952209012213, |
| "epoch": 0.5135520684736091, |
| "step": 540 |
| }, |
| { |
| "loss": 1.2299, |
| "grad_norm": 0.5489751696586609, |
| "learning_rate": 0.00019659427765982652, |
| "epoch": 0.5183071802187351, |
| "step": 545 |
| }, |
| { |
| "loss": 1.2411, |
| "grad_norm": 0.6042281985282898, |
| "learning_rate": 0.0001964918425782592, |
| "epoch": 0.5230622919638611, |
| "step": 550 |
| }, |
| { |
| "loss": 1.2472, |
| "grad_norm": 0.5573949217796326, |
| "learning_rate": 0.0001963879172385672, |
| "epoch": 0.5278174037089871, |
| "step": 555 |
| }, |
| { |
| "loss": 1.2394, |
| "grad_norm": 0.5466697216033936, |
| "learning_rate": 0.00019628250324581448, |
| "epoch": 0.5325725154541132, |
| "step": 560 |
| }, |
| { |
| "loss": 1.2296, |
| "grad_norm": 0.6046185493469238, |
| "learning_rate": 0.00019617560222805642, |
| "epoch": 0.5373276271992392, |
| "step": 565 |
| }, |
| { |
| "loss": 1.231, |
| "grad_norm": 0.610977053642273, |
| "learning_rate": 0.00019606721583631455, |
| "epoch": 0.5420827389443652, |
| "step": 570 |
| }, |
| { |
| "loss": 1.2303, |
| "grad_norm": 0.6490219831466675, |
| "learning_rate": 0.00019595734574455107, |
| "epoch": 0.5468378506894912, |
| "step": 575 |
| }, |
| { |
| "loss": 1.2644, |
| "grad_norm": 0.6017969250679016, |
| "learning_rate": 0.0001958459936496431, |
| "epoch": 0.5515929624346172, |
| "step": 580 |
| }, |
| { |
| "loss": 1.2217, |
| "grad_norm": 0.5338231921195984, |
| "learning_rate": 0.0001957331612713564, |
| "epoch": 0.5563480741797432, |
| "step": 585 |
| }, |
| { |
| "loss": 1.2383, |
| "grad_norm": 0.5838634371757507, |
| "learning_rate": 0.00019561885035231872, |
| "epoch": 0.5611031859248692, |
| "step": 590 |
| }, |
| { |
| "loss": 1.2597, |
| "grad_norm": 0.6423035264015198, |
| "learning_rate": 0.0001955030626579931, |
| "epoch": 0.5658582976699953, |
| "step": 595 |
| }, |
| { |
| "loss": 1.2374, |
| "grad_norm": 0.5716623663902283, |
| "learning_rate": 0.00019538579997665037, |
| "epoch": 0.5706134094151213, |
| "step": 600 |
| }, |
| { |
| "loss": 1.2098, |
| "grad_norm": 0.5534536838531494, |
| "learning_rate": 0.0001952670641193417, |
| "epoch": 0.5753685211602473, |
| "step": 605 |
| }, |
| { |
| "loss": 1.2401, |
| "grad_norm": 0.5755789875984192, |
| "learning_rate": 0.00019514685691987058, |
| "epoch": 0.5801236329053733, |
| "step": 610 |
| }, |
| { |
| "loss": 1.2106, |
| "grad_norm": 0.5546795725822449, |
| "learning_rate": 0.00019502518023476446, |
| "epoch": 0.5848787446504993, |
| "step": 615 |
| }, |
| { |
| "loss": 1.2454, |
| "grad_norm": 0.5658919215202332, |
| "learning_rate": 0.00019490203594324615, |
| "epoch": 0.5896338563956253, |
| "step": 620 |
| }, |
| { |
| "loss": 1.2363, |
| "grad_norm": 0.567059338092804, |
| "learning_rate": 0.00019477742594720468, |
| "epoch": 0.5943889681407513, |
| "step": 625 |
| }, |
| { |
| "loss": 1.2315, |
| "grad_norm": 0.5887007117271423, |
| "learning_rate": 0.00019465135217116607, |
| "epoch": 0.5991440798858774, |
| "step": 630 |
| }, |
| { |
| "loss": 1.2391, |
| "grad_norm": 0.5837830305099487, |
| "learning_rate": 0.00019452381656226347, |
| "epoch": 0.6038991916310034, |
| "step": 635 |
| }, |
| { |
| "loss": 1.2253, |
| "grad_norm": 0.5108605027198792, |
| "learning_rate": 0.00019439482109020721, |
| "epoch": 0.6086543033761294, |
| "step": 640 |
| }, |
| { |
| "loss": 1.2244, |
| "grad_norm": 0.5938732028007507, |
| "learning_rate": 0.00019426436774725434, |
| "epoch": 0.6134094151212554, |
| "step": 645 |
| }, |
| { |
| "loss": 1.2234, |
| "grad_norm": 0.5782492160797119, |
| "learning_rate": 0.0001941324585481778, |
| "epoch": 0.6181645268663813, |
| "step": 650 |
| }, |
| { |
| "loss": 1.2453, |
| "grad_norm": 0.5599561333656311, |
| "learning_rate": 0.0001939990955302353, |
| "epoch": 0.6229196386115073, |
| "step": 655 |
| }, |
| { |
| "loss": 1.2145, |
| "grad_norm": 0.5237142443656921, |
| "learning_rate": 0.000193864280753138, |
| "epoch": 0.6276747503566333, |
| "step": 660 |
| }, |
| { |
| "loss": 1.2224, |
| "grad_norm": 0.5659124851226807, |
| "learning_rate": 0.00019372801629901858, |
| "epoch": 0.6324298621017594, |
| "step": 665 |
| }, |
| { |
| "loss": 1.2063, |
| "grad_norm": 0.634732186794281, |
| "learning_rate": 0.00019359030427239905, |
| "epoch": 0.6371849738468854, |
| "step": 670 |
| }, |
| { |
| "loss": 1.2426, |
| "grad_norm": 0.5839982032775879, |
| "learning_rate": 0.00019345114680015835, |
| "epoch": 0.6419400855920114, |
| "step": 675 |
| }, |
| { |
| "loss": 1.22, |
| "grad_norm": 0.5994731783866882, |
| "learning_rate": 0.00019331054603149943, |
| "epoch": 0.6466951973371374, |
| "step": 680 |
| }, |
| { |
| "loss": 1.2101, |
| "grad_norm": 0.5626839399337769, |
| "learning_rate": 0.00019316850413791608, |
| "epoch": 0.6514503090822634, |
| "step": 685 |
| }, |
| { |
| "loss": 1.2071, |
| "grad_norm": 0.5403054356575012, |
| "learning_rate": 0.00019302502331315947, |
| "epoch": 0.6562054208273894, |
| "step": 690 |
| }, |
| { |
| "loss": 1.2184, |
| "grad_norm": 0.5751729011535645, |
| "learning_rate": 0.00019288010577320406, |
| "epoch": 0.6609605325725154, |
| "step": 695 |
| }, |
| { |
| "loss": 1.2111, |
| "grad_norm": 0.5693508982658386, |
| "learning_rate": 0.00019273375375621353, |
| "epoch": 0.6657156443176415, |
| "step": 700 |
| }, |
| { |
| "loss": 1.2015, |
| "grad_norm": 0.5208821296691895, |
| "learning_rate": 0.00019258596952250633, |
| "epoch": 0.6704707560627675, |
| "step": 705 |
| }, |
| { |
| "loss": 1.2028, |
| "grad_norm": 0.5707488059997559, |
| "learning_rate": 0.00019243675535452048, |
| "epoch": 0.6752258678078935, |
| "step": 710 |
| }, |
| { |
| "loss": 1.2277, |
| "grad_norm": 0.5503364205360413, |
| "learning_rate": 0.0001922861135567785, |
| "epoch": 0.6799809795530195, |
| "step": 715 |
| }, |
| { |
| "loss": 1.2071, |
| "grad_norm": 0.5710814595222473, |
| "learning_rate": 0.00019213404645585183, |
| "epoch": 0.6847360912981455, |
| "step": 720 |
| }, |
| { |
| "loss": 1.2045, |
| "grad_norm": 0.5907149910926819, |
| "learning_rate": 0.00019198055640032484, |
| "epoch": 0.6894912030432715, |
| "step": 725 |
| }, |
| { |
| "loss": 1.2227, |
| "grad_norm": 0.5389401912689209, |
| "learning_rate": 0.0001918256457607586, |
| "epoch": 0.6942463147883975, |
| "step": 730 |
| }, |
| { |
| "loss": 1.2065, |
| "grad_norm": 0.5665199160575867, |
| "learning_rate": 0.0001916693169296541, |
| "epoch": 0.6990014265335235, |
| "step": 735 |
| }, |
| { |
| "loss": 1.1833, |
| "grad_norm": 0.5717663764953613, |
| "learning_rate": 0.00019151157232141566, |
| "epoch": 0.7037565382786496, |
| "step": 740 |
| }, |
| { |
| "loss": 1.1979, |
| "grad_norm": 0.5833514928817749, |
| "learning_rate": 0.00019135241437231323, |
| "epoch": 0.7085116500237756, |
| "step": 745 |
| }, |
| { |
| "loss": 1.199, |
| "grad_norm": 0.5985444188117981, |
| "learning_rate": 0.00019119184554044507, |
| "epoch": 0.7132667617689016, |
| "step": 750 |
| }, |
| { |
| "loss": 1.2197, |
| "grad_norm": 0.5858203172683716, |
| "learning_rate": 0.00019102986830569964, |
| "epoch": 0.7180218735140276, |
| "step": 755 |
| }, |
| { |
| "loss": 1.2008, |
| "grad_norm": 0.5281049013137817, |
| "learning_rate": 0.00019086648516971732, |
| "epoch": 0.7227769852591536, |
| "step": 760 |
| }, |
| { |
| "loss": 1.1973, |
| "grad_norm": 0.5883271098136902, |
| "learning_rate": 0.00019070169865585175, |
| "epoch": 0.7275320970042796, |
| "step": 765 |
| }, |
| { |
| "loss": 1.185, |
| "grad_norm": 0.5440351366996765, |
| "learning_rate": 0.000190535511309131, |
| "epoch": 0.7322872087494056, |
| "step": 770 |
| }, |
| { |
| "loss": 1.1824, |
| "grad_norm": 0.5481285452842712, |
| "learning_rate": 0.000190367925696218, |
| "epoch": 0.7370423204945317, |
| "step": 775 |
| }, |
| { |
| "loss": 1.1949, |
| "grad_norm": 0.5427468419075012, |
| "learning_rate": 0.0001901989444053712, |
| "epoch": 0.7417974322396577, |
| "step": 780 |
| }, |
| { |
| "loss": 1.2113, |
| "grad_norm": 0.5643748641014099, |
| "learning_rate": 0.00019002857004640438, |
| "epoch": 0.7465525439847837, |
| "step": 785 |
| }, |
| { |
| "loss": 1.1838, |
| "grad_norm": 0.5325281620025635, |
| "learning_rate": 0.00018985680525064643, |
| "epoch": 0.7513076557299097, |
| "step": 790 |
| }, |
| { |
| "loss": 1.1839, |
| "grad_norm": 0.5137047171592712, |
| "learning_rate": 0.00018968365267090068, |
| "epoch": 0.7560627674750356, |
| "step": 795 |
| }, |
| { |
| "loss": 1.2114, |
| "grad_norm": 0.5573691129684448, |
| "learning_rate": 0.00018950911498140396, |
| "epoch": 0.7608178792201616, |
| "step": 800 |
| }, |
| { |
| "loss": 1.2038, |
| "grad_norm": 0.5285890102386475, |
| "learning_rate": 0.00018933319487778527, |
| "epoch": 0.7655729909652876, |
| "step": 805 |
| }, |
| { |
| "loss": 1.2191, |
| "grad_norm": 0.5503642559051514, |
| "learning_rate": 0.0001891558950770242, |
| "epoch": 0.7703281027104137, |
| "step": 810 |
| }, |
| { |
| "loss": 1.1623, |
| "grad_norm": 0.5187557339668274, |
| "learning_rate": 0.00018897721831740886, |
| "epoch": 0.7750832144555397, |
| "step": 815 |
| }, |
| { |
| "loss": 1.1683, |
| "grad_norm": 0.5442098379135132, |
| "learning_rate": 0.0001887971673584937, |
| "epoch": 0.7798383262006657, |
| "step": 820 |
| }, |
| { |
| "loss": 1.1965, |
| "grad_norm": 0.6257349848747253, |
| "learning_rate": 0.00018861574498105682, |
| "epoch": 0.7845934379457917, |
| "step": 825 |
| }, |
| { |
| "loss": 1.1869, |
| "grad_norm": 0.5780654549598694, |
| "learning_rate": 0.00018843295398705712, |
| "epoch": 0.7893485496909177, |
| "step": 830 |
| }, |
| { |
| "loss": 1.1768, |
| "grad_norm": 0.5473431348800659, |
| "learning_rate": 0.00018824879719959086, |
| "epoch": 0.7941036614360437, |
| "step": 835 |
| }, |
| { |
| "loss": 1.1961, |
| "grad_norm": 0.559974730014801, |
| "learning_rate": 0.00018806327746284817, |
| "epoch": 0.7988587731811697, |
| "step": 840 |
| }, |
| { |
| "loss": 1.1988, |
| "grad_norm": 2.5416524410247803, |
| "learning_rate": 0.00018787639764206922, |
| "epoch": 0.8036138849262958, |
| "step": 845 |
| }, |
| { |
| "loss": 1.1939, |
| "grad_norm": 0.535737931728363, |
| "learning_rate": 0.00018768816062349967, |
| "epoch": 0.8083689966714218, |
| "step": 850 |
| }, |
| { |
| "loss": 1.207, |
| "grad_norm": 0.5344818234443665, |
| "learning_rate": 0.00018749856931434645, |
| "epoch": 0.8131241084165478, |
| "step": 855 |
| }, |
| { |
| "loss": 1.1776, |
| "grad_norm": 0.5346983671188354, |
| "learning_rate": 0.00018730762664273255, |
| "epoch": 0.8178792201616738, |
| "step": 860 |
| }, |
| { |
| "loss": 1.1882, |
| "grad_norm": 0.5402603149414062, |
| "learning_rate": 0.00018711533555765207, |
| "epoch": 0.8226343319067998, |
| "step": 865 |
| }, |
| { |
| "loss": 1.1664, |
| "grad_norm": 0.6313158869743347, |
| "learning_rate": 0.00018692169902892442, |
| "epoch": 0.8273894436519258, |
| "step": 870 |
| }, |
| { |
| "loss": 1.1791, |
| "grad_norm": 0.5297380685806274, |
| "learning_rate": 0.00018672672004714863, |
| "epoch": 0.8321445553970518, |
| "step": 875 |
| }, |
| { |
| "loss": 1.1847, |
| "grad_norm": 0.5445675849914551, |
| "learning_rate": 0.0001865304016236571, |
| "epoch": 0.8368996671421779, |
| "step": 880 |
| }, |
| { |
| "loss": 1.1782, |
| "grad_norm": 0.5190274119377136, |
| "learning_rate": 0.0001863327467904691, |
| "epoch": 0.8416547788873039, |
| "step": 885 |
| }, |
| { |
| "loss": 1.1906, |
| "grad_norm": 0.5527743101119995, |
| "learning_rate": 0.00018613375860024396, |
| "epoch": 0.8464098906324299, |
| "step": 890 |
| }, |
| { |
| "loss": 1.2029, |
| "grad_norm": 0.5394300222396851, |
| "learning_rate": 0.00018593344012623385, |
| "epoch": 0.8511650023775559, |
| "step": 895 |
| }, |
| { |
| "loss": 1.1878, |
| "grad_norm": 0.5860111713409424, |
| "learning_rate": 0.00018573179446223636, |
| "epoch": 0.8559201141226819, |
| "step": 900 |
| }, |
| { |
| "loss": 1.1655, |
| "grad_norm": 0.5082983374595642, |
| "learning_rate": 0.00018552882472254684, |
| "epoch": 0.8606752258678079, |
| "step": 905 |
| }, |
| { |
| "loss": 1.1792, |
| "grad_norm": 0.5140711069107056, |
| "learning_rate": 0.00018532453404191004, |
| "epoch": 0.8654303376129339, |
| "step": 910 |
| }, |
| { |
| "loss": 1.208, |
| "grad_norm": 0.6072941422462463, |
| "learning_rate": 0.00018511892557547194, |
| "epoch": 0.8701854493580599, |
| "step": 915 |
| }, |
| { |
| "loss": 1.1941, |
| "grad_norm": 0.5361263155937195, |
| "learning_rate": 0.00018491200249873093, |
| "epoch": 0.874940561103186, |
| "step": 920 |
| }, |
| { |
| "loss": 1.1662, |
| "grad_norm": 0.5434235334396362, |
| "learning_rate": 0.00018470376800748867, |
| "epoch": 0.879695672848312, |
| "step": 925 |
| }, |
| { |
| "loss": 1.1851, |
| "grad_norm": 0.5230010151863098, |
| "learning_rate": 0.00018449422531780093, |
| "epoch": 0.884450784593438, |
| "step": 930 |
| }, |
| { |
| "loss": 1.1724, |
| "grad_norm": 0.5736663937568665, |
| "learning_rate": 0.0001842833776659277, |
| "epoch": 0.889205896338564, |
| "step": 935 |
| }, |
| { |
| "loss": 1.1996, |
| "grad_norm": 0.5705561637878418, |
| "learning_rate": 0.0001840712283082835, |
| "epoch": 0.89396100808369, |
| "step": 940 |
| }, |
| { |
| "loss": 1.1705, |
| "grad_norm": 0.533709704875946, |
| "learning_rate": 0.00018385778052138667, |
| "epoch": 0.8987161198288159, |
| "step": 945 |
| }, |
| { |
| "loss": 1.1765, |
| "grad_norm": 0.5367459058761597, |
| "learning_rate": 0.00018364303760180922, |
| "epoch": 0.9034712315739419, |
| "step": 950 |
| }, |
| { |
| "loss": 1.1733, |
| "grad_norm": 0.5386838316917419, |
| "learning_rate": 0.00018342700286612553, |
| "epoch": 0.908226343319068, |
| "step": 955 |
| }, |
| { |
| "loss": 1.1905, |
| "grad_norm": 0.5734283328056335, |
| "learning_rate": 0.00018320967965086146, |
| "epoch": 0.912981455064194, |
| "step": 960 |
| }, |
| { |
| "loss": 1.183, |
| "grad_norm": 0.6564159989356995, |
| "learning_rate": 0.00018299107131244252, |
| "epoch": 0.91773656680932, |
| "step": 965 |
| }, |
| { |
| "loss": 1.18, |
| "grad_norm": 0.5927287340164185, |
| "learning_rate": 0.0001827711812271422, |
| "epoch": 0.922491678554446, |
| "step": 970 |
| }, |
| { |
| "loss": 1.1867, |
| "grad_norm": 0.6005284190177917, |
| "learning_rate": 0.00018255001279102992, |
| "epoch": 0.927246790299572, |
| "step": 975 |
| }, |
| { |
| "loss": 1.1782, |
| "grad_norm": 0.5389478802680969, |
| "learning_rate": 0.00018232756941991826, |
| "epoch": 0.932001902044698, |
| "step": 980 |
| }, |
| { |
| "loss": 1.1573, |
| "grad_norm": 0.5602630972862244, |
| "learning_rate": 0.00018210385454931055, |
| "epoch": 0.936757013789824, |
| "step": 985 |
| }, |
| { |
| "loss": 1.178, |
| "grad_norm": 0.5245372653007507, |
| "learning_rate": 0.00018187887163434753, |
| "epoch": 0.9415121255349501, |
| "step": 990 |
| }, |
| { |
| "loss": 1.1478, |
| "grad_norm": 0.6094080209732056, |
| "learning_rate": 0.00018165262414975432, |
| "epoch": 0.9462672372800761, |
| "step": 995 |
| }, |
| { |
| "loss": 1.1932, |
| "grad_norm": 0.5100232362747192, |
| "learning_rate": 0.00018142511558978628, |
| "epoch": 0.9510223490252021, |
| "step": 1000 |
| }, |
| { |
| "loss": 1.1704, |
| "grad_norm": 0.5068570971488953, |
| "learning_rate": 0.00018119634946817557, |
| "epoch": 0.9557774607703281, |
| "step": 1005 |
| }, |
| { |
| "loss": 1.1742, |
| "grad_norm": 0.5550134181976318, |
| "learning_rate": 0.00018096632931807643, |
| "epoch": 0.9605325725154541, |
| "step": 1010 |
| }, |
| { |
| "loss": 1.1908, |
| "grad_norm": 0.5453625321388245, |
| "learning_rate": 0.000180735058692011, |
| "epoch": 0.9652876842605801, |
| "step": 1015 |
| }, |
| { |
| "loss": 1.1792, |
| "grad_norm": 0.5452067852020264, |
| "learning_rate": 0.00018050254116181412, |
| "epoch": 0.9700427960057061, |
| "step": 1020 |
| }, |
| { |
| "loss": 1.1733, |
| "grad_norm": 0.539887547492981, |
| "learning_rate": 0.0001802687803185783, |
| "epoch": 0.9747979077508322, |
| "step": 1025 |
| }, |
| { |
| "loss": 1.2122, |
| "grad_norm": 0.563098132610321, |
| "learning_rate": 0.00018003377977259842, |
| "epoch": 0.9795530194959582, |
| "step": 1030 |
| }, |
| { |
| "loss": 1.1833, |
| "grad_norm": 0.5663331747055054, |
| "learning_rate": 0.00017979754315331566, |
| "epoch": 0.9843081312410842, |
| "step": 1035 |
| }, |
| { |
| "loss": 1.1664, |
| "grad_norm": 0.5534679293632507, |
| "learning_rate": 0.00017956007410926166, |
| "epoch": 0.9890632429862102, |
| "step": 1040 |
| }, |
| { |
| "loss": 1.1751, |
| "grad_norm": 0.5449529886245728, |
| "learning_rate": 0.00017932137630800217, |
| "epoch": 0.9938183547313362, |
| "step": 1045 |
| }, |
| { |
| "loss": 1.174, |
| "grad_norm": 0.5402258038520813, |
| "learning_rate": 0.00017908145343608026, |
| "epoch": 0.9985734664764622, |
| "step": 1050 |
| }, |
| { |
| "loss": 1.1402, |
| "grad_norm": 0.6218162178993225, |
| "learning_rate": 0.00017884030919895953, |
| "epoch": 1.0028530670470757, |
| "step": 1055 |
| }, |
| { |
| "loss": 1.1192, |
| "grad_norm": 0.5757145285606384, |
| "learning_rate": 0.00017859794732096683, |
| "epoch": 1.0076081787922015, |
| "step": 1060 |
| }, |
| { |
| "loss": 1.1238, |
| "grad_norm": 0.6729576587677002, |
| "learning_rate": 0.0001783543715452347, |
| "epoch": 1.0123632905373277, |
| "step": 1065 |
| }, |
| { |
| "loss": 1.16, |
| "grad_norm": 0.5097976326942444, |
| "learning_rate": 0.00017810958563364363, |
| "epoch": 1.0171184022824535, |
| "step": 1070 |
| }, |
| { |
| "loss": 1.1443, |
| "grad_norm": 0.5340394973754883, |
| "learning_rate": 0.00017786359336676387, |
| "epoch": 1.0218735140275796, |
| "step": 1075 |
| }, |
| { |
| "loss": 1.138, |
| "grad_norm": 0.5654225945472717, |
| "learning_rate": 0.0001776163985437972, |
| "epoch": 1.0266286257727057, |
| "step": 1080 |
| }, |
| { |
| "loss": 1.1217, |
| "grad_norm": 0.5368727445602417, |
| "learning_rate": 0.0001773680049825181, |
| "epoch": 1.0313837375178316, |
| "step": 1085 |
| }, |
| { |
| "loss": 1.1311, |
| "grad_norm": 0.5356417894363403, |
| "learning_rate": 0.00017711841651921474, |
| "epoch": 1.0361388492629577, |
| "step": 1090 |
| }, |
| { |
| "loss": 1.1408, |
| "grad_norm": 0.5295414924621582, |
| "learning_rate": 0.00017686763700863002, |
| "epoch": 1.0408939610080836, |
| "step": 1095 |
| }, |
| { |
| "loss": 1.1378, |
| "grad_norm": 0.5422346591949463, |
| "learning_rate": 0.0001766156703239017, |
| "epoch": 1.0456490727532097, |
| "step": 1100 |
| }, |
| { |
| "loss": 1.1237, |
| "grad_norm": 0.551154613494873, |
| "learning_rate": 0.00017636252035650276, |
| "epoch": 1.0504041844983356, |
| "step": 1105 |
| }, |
| { |
| "loss": 1.1118, |
| "grad_norm": 0.509323239326477, |
| "learning_rate": 0.00017610819101618127, |
| "epoch": 1.0551592962434617, |
| "step": 1110 |
| }, |
| { |
| "loss": 1.134, |
| "grad_norm": 0.5362396836280823, |
| "learning_rate": 0.00017585268623090004, |
| "epoch": 1.0599144079885878, |
| "step": 1115 |
| }, |
| { |
| "loss": 1.1097, |
| "grad_norm": 0.5813230276107788, |
| "learning_rate": 0.00017559600994677586, |
| "epoch": 1.0646695197337137, |
| "step": 1120 |
| }, |
| { |
| "loss": 1.1201, |
| "grad_norm": 0.5430865287780762, |
| "learning_rate": 0.00017533816612801858, |
| "epoch": 1.0694246314788398, |
| "step": 1125 |
| }, |
| { |
| "loss": 1.1265, |
| "grad_norm": 0.5772477984428406, |
| "learning_rate": 0.00017507915875687009, |
| "epoch": 1.0741797432239657, |
| "step": 1130 |
| }, |
| { |
| "loss": 1.1378, |
| "grad_norm": 0.5706289410591125, |
| "learning_rate": 0.0001748189918335424, |
| "epoch": 1.0789348549690918, |
| "step": 1135 |
| }, |
| { |
| "loss": 1.1219, |
| "grad_norm": 0.5566823482513428, |
| "learning_rate": 0.0001745576693761563, |
| "epoch": 1.0836899667142177, |
| "step": 1140 |
| }, |
| { |
| "loss": 1.1104, |
| "grad_norm": 0.6031088829040527, |
| "learning_rate": 0.00017429519542067897, |
| "epoch": 1.0884450784593438, |
| "step": 1145 |
| }, |
| { |
| "loss": 1.1156, |
| "grad_norm": 0.5697749257087708, |
| "learning_rate": 0.00017403157402086193, |
| "epoch": 1.09320019020447, |
| "step": 1150 |
| }, |
| { |
| "loss": 1.134, |
| "grad_norm": 0.5659533739089966, |
| "learning_rate": 0.00017376680924817807, |
| "epoch": 1.0979553019495958, |
| "step": 1155 |
| }, |
| { |
| "loss": 1.0961, |
| "grad_norm": 0.5105201005935669, |
| "learning_rate": 0.00017350090519175918, |
| "epoch": 1.102710413694722, |
| "step": 1160 |
| }, |
| { |
| "loss": 1.1404, |
| "grad_norm": 0.5230898857116699, |
| "learning_rate": 0.00017323386595833248, |
| "epoch": 1.1074655254398478, |
| "step": 1165 |
| }, |
| { |
| "loss": 1.0993, |
| "grad_norm": 0.5445020794868469, |
| "learning_rate": 0.00017296569567215743, |
| "epoch": 1.1122206371849739, |
| "step": 1170 |
| }, |
| { |
| "loss": 1.115, |
| "grad_norm": 0.5502917766571045, |
| "learning_rate": 0.00017269639847496175, |
| "epoch": 1.1169757489300998, |
| "step": 1175 |
| }, |
| { |
| "loss": 1.1062, |
| "grad_norm": 0.5155828595161438, |
| "learning_rate": 0.00017242597852587776, |
| "epoch": 1.1217308606752259, |
| "step": 1180 |
| }, |
| { |
| "loss": 1.0968, |
| "grad_norm": 0.5539115071296692, |
| "learning_rate": 0.00017215444000137796, |
| "epoch": 1.126485972420352, |
| "step": 1185 |
| }, |
| { |
| "loss": 1.1482, |
| "grad_norm": 0.5663743019104004, |
| "learning_rate": 0.00017188178709521052, |
| "epoch": 1.1312410841654779, |
| "step": 1190 |
| }, |
| { |
| "loss": 1.1361, |
| "grad_norm": 0.5471282601356506, |
| "learning_rate": 0.00017160802401833462, |
| "epoch": 1.135996195910604, |
| "step": 1195 |
| }, |
| { |
| "loss": 1.136, |
| "grad_norm": 0.5797370672225952, |
| "learning_rate": 0.00017133315499885535, |
| "epoch": 1.1407513076557299, |
| "step": 1200 |
| }, |
| { |
| "loss": 1.1097, |
| "grad_norm": 0.5269182920455933, |
| "learning_rate": 0.00017105718428195838, |
| "epoch": 1.145506419400856, |
| "step": 1205 |
| }, |
| { |
| "loss": 1.1198, |
| "grad_norm": 0.5525156259536743, |
| "learning_rate": 0.00017078011612984447, |
| "epoch": 1.1502615311459818, |
| "step": 1210 |
| }, |
| { |
| "loss": 1.124, |
| "grad_norm": 0.5491119027137756, |
| "learning_rate": 0.0001705019548216636, |
| "epoch": 1.155016642891108, |
| "step": 1215 |
| }, |
| { |
| "loss": 1.1429, |
| "grad_norm": 0.537158727645874, |
| "learning_rate": 0.00017022270465344883, |
| "epoch": 1.159771754636234, |
| "step": 1220 |
| }, |
| { |
| "loss": 1.119, |
| "grad_norm": 0.5533539056777954, |
| "learning_rate": 0.00016994236993805005, |
| "epoch": 1.16452686638136, |
| "step": 1225 |
| }, |
| { |
| "loss": 1.0961, |
| "grad_norm": 0.5583034157752991, |
| "learning_rate": 0.00016966095500506735, |
| "epoch": 1.169281978126486, |
| "step": 1230 |
| }, |
| { |
| "loss": 1.1256, |
| "grad_norm": 0.5393441915512085, |
| "learning_rate": 0.00016937846420078407, |
| "epoch": 1.174037089871612, |
| "step": 1235 |
| }, |
| { |
| "loss": 1.108, |
| "grad_norm": 0.5612584352493286, |
| "learning_rate": 0.00016909490188809974, |
| "epoch": 1.178792201616738, |
| "step": 1240 |
| }, |
| { |
| "loss": 1.1325, |
| "grad_norm": 0.9341227412223816, |
| "learning_rate": 0.00016881027244646277, |
| "epoch": 1.183547313361864, |
| "step": 1245 |
| }, |
| { |
| "loss": 1.1073, |
| "grad_norm": 0.5361753702163696, |
| "learning_rate": 0.00016852458027180261, |
| "epoch": 1.18830242510699, |
| "step": 1250 |
| }, |
| { |
| "loss": 1.1205, |
| "grad_norm": 0.5428937673568726, |
| "learning_rate": 0.00016823782977646204, |
| "epoch": 1.1930575368521161, |
| "step": 1255 |
| }, |
| { |
| "loss": 1.1476, |
| "grad_norm": 0.6153729557991028, |
| "learning_rate": 0.000167950025389129, |
| "epoch": 1.197812648597242, |
| "step": 1260 |
| }, |
| { |
| "loss": 1.0933, |
| "grad_norm": 0.5355603694915771, |
| "learning_rate": 0.00016766117155476802, |
| "epoch": 1.2025677603423681, |
| "step": 1265 |
| }, |
| { |
| "loss": 1.1007, |
| "grad_norm": 0.520135223865509, |
| "learning_rate": 0.00016737127273455186, |
| "epoch": 1.207322872087494, |
| "step": 1270 |
| }, |
| { |
| "loss": 1.1362, |
| "grad_norm": 0.560717761516571, |
| "learning_rate": 0.00016708033340579243, |
| "epoch": 1.21207798383262, |
| "step": 1275 |
| }, |
| { |
| "loss": 1.1057, |
| "grad_norm": 0.5831757187843323, |
| "learning_rate": 0.00016678835806187156, |
| "epoch": 1.216833095577746, |
| "step": 1280 |
| }, |
| { |
| "loss": 1.1099, |
| "grad_norm": 0.6481867432594299, |
| "learning_rate": 0.00016649535121217177, |
| "epoch": 1.221588207322872, |
| "step": 1285 |
| }, |
| { |
| "loss": 1.1255, |
| "grad_norm": 0.5575374960899353, |
| "learning_rate": 0.00016620131738200663, |
| "epoch": 1.2263433190679982, |
| "step": 1290 |
| }, |
| { |
| "loss": 1.1116, |
| "grad_norm": 0.5407826900482178, |
| "learning_rate": 0.00016590626111255064, |
| "epoch": 1.231098430813124, |
| "step": 1295 |
| }, |
| { |
| "loss": 1.1085, |
| "grad_norm": 0.5181446075439453, |
| "learning_rate": 0.00016561018696076944, |
| "epoch": 1.2358535425582502, |
| "step": 1300 |
| }, |
| { |
| "loss": 1.0851, |
| "grad_norm": 0.537004292011261, |
| "learning_rate": 0.00016531309949934916, |
| "epoch": 1.240608654303376, |
| "step": 1305 |
| }, |
| { |
| "loss": 1.126, |
| "grad_norm": 0.5328262448310852, |
| "learning_rate": 0.00016501500331662577, |
| "epoch": 1.2453637660485022, |
| "step": 1310 |
| }, |
| { |
| "loss": 1.1175, |
| "grad_norm": 0.5213992595672607, |
| "learning_rate": 0.0001647159030165146, |
| "epoch": 1.250118877793628, |
| "step": 1315 |
| }, |
| { |
| "loss": 1.0919, |
| "grad_norm": 0.5673755407333374, |
| "learning_rate": 0.00016441580321843873, |
| "epoch": 1.2548739895387542, |
| "step": 1320 |
| }, |
| { |
| "loss": 1.1141, |
| "grad_norm": 0.5928951501846313, |
| "learning_rate": 0.00016411470855725793, |
| "epoch": 1.2596291012838803, |
| "step": 1325 |
| }, |
| { |
| "loss": 1.1131, |
| "grad_norm": 0.531998872756958, |
| "learning_rate": 0.00016381262368319712, |
| "epoch": 1.2643842130290062, |
| "step": 1330 |
| }, |
| { |
| "loss": 1.1212, |
| "grad_norm": 0.5531988739967346, |
| "learning_rate": 0.00016350955326177436, |
| "epoch": 1.2691393247741323, |
| "step": 1335 |
| }, |
| { |
| "loss": 1.1177, |
| "grad_norm": 0.5095930099487305, |
| "learning_rate": 0.00016320550197372895, |
| "epoch": 1.2738944365192582, |
| "step": 1340 |
| }, |
| { |
| "loss": 1.1223, |
| "grad_norm": 0.5505598187446594, |
| "learning_rate": 0.00016290047451494906, |
| "epoch": 1.2786495482643843, |
| "step": 1345 |
| }, |
| { |
| "loss": 1.1132, |
| "grad_norm": 1.0145131349563599, |
| "learning_rate": 0.00016259447559639924, |
| "epoch": 1.2834046600095101, |
| "step": 1350 |
| }, |
| { |
| "loss": 1.1482, |
| "grad_norm": 0.5326014161109924, |
| "learning_rate": 0.00016228750994404765, |
| "epoch": 1.2881597717546363, |
| "step": 1355 |
| }, |
| { |
| "loss": 1.1181, |
| "grad_norm": 0.5488507151603699, |
| "learning_rate": 0.00016197958229879298, |
| "epoch": 1.2929148834997624, |
| "step": 1360 |
| }, |
| { |
| "loss": 1.105, |
| "grad_norm": 0.5284917950630188, |
| "learning_rate": 0.00016167069741639147, |
| "epoch": 1.2976699952448882, |
| "step": 1365 |
| }, |
| { |
| "loss": 1.1137, |
| "grad_norm": 0.5355532169342041, |
| "learning_rate": 0.0001613608600673832, |
| "epoch": 1.3024251069900143, |
| "step": 1370 |
| }, |
| { |
| "loss": 1.1092, |
| "grad_norm": 0.5276153087615967, |
| "learning_rate": 0.00016105007503701858, |
| "epoch": 1.3071802187351402, |
| "step": 1375 |
| }, |
| { |
| "loss": 1.1249, |
| "grad_norm": 0.5551221966743469, |
| "learning_rate": 0.00016073834712518431, |
| "epoch": 1.3119353304802663, |
| "step": 1380 |
| }, |
| { |
| "loss": 1.1017, |
| "grad_norm": 0.5625216364860535, |
| "learning_rate": 0.00016042568114632943, |
| "epoch": 1.3166904422253922, |
| "step": 1385 |
| }, |
| { |
| "loss": 1.1223, |
| "grad_norm": 0.5563182234764099, |
| "learning_rate": 0.00016011208192939078, |
| "epoch": 1.3214455539705183, |
| "step": 1390 |
| }, |
| { |
| "loss": 1.1108, |
| "grad_norm": 0.52386474609375, |
| "learning_rate": 0.00015979755431771848, |
| "epoch": 1.3262006657156444, |
| "step": 1395 |
| }, |
| { |
| "loss": 1.1126, |
| "grad_norm": 0.5781528353691101, |
| "learning_rate": 0.00015948210316900122, |
| "epoch": 1.3309557774607703, |
| "step": 1400 |
| }, |
| { |
| "loss": 1.1, |
| "grad_norm": 0.5766226649284363, |
| "learning_rate": 0.00015916573335519112, |
| "epoch": 1.3357108892058962, |
| "step": 1405 |
| }, |
| { |
| "loss": 1.1262, |
| "grad_norm": 0.5536102056503296, |
| "learning_rate": 0.00015884844976242853, |
| "epoch": 1.3404660009510223, |
| "step": 1410 |
| }, |
| { |
| "loss": 1.1323, |
| "grad_norm": 0.5428702235221863, |
| "learning_rate": 0.00015853025729096654, |
| "epoch": 1.3452211126961484, |
| "step": 1415 |
| }, |
| { |
| "loss": 1.0691, |
| "grad_norm": 0.5881286859512329, |
| "learning_rate": 0.0001582111608550954, |
| "epoch": 1.3499762244412743, |
| "step": 1420 |
| }, |
| { |
| "loss": 1.0971, |
| "grad_norm": 0.5376484394073486, |
| "learning_rate": 0.00015789116538306646, |
| "epoch": 1.3547313361864004, |
| "step": 1425 |
| }, |
| { |
| "loss": 1.1047, |
| "grad_norm": 0.5175846815109253, |
| "learning_rate": 0.00015757027581701612, |
| "epoch": 1.3594864479315265, |
| "step": 1430 |
| }, |
| { |
| "loss": 1.1124, |
| "grad_norm": 0.5348005890846252, |
| "learning_rate": 0.00015724849711288959, |
| "epoch": 1.3642415596766524, |
| "step": 1435 |
| }, |
| { |
| "loss": 1.1035, |
| "grad_norm": 0.517838716506958, |
| "learning_rate": 0.00015692583424036432, |
| "epoch": 1.3689966714217783, |
| "step": 1440 |
| }, |
| { |
| "loss": 1.0946, |
| "grad_norm": 0.5407333970069885, |
| "learning_rate": 0.00015660229218277306, |
| "epoch": 1.3737517831669044, |
| "step": 1445 |
| }, |
| { |
| "loss": 1.108, |
| "grad_norm": 0.5456969738006592, |
| "learning_rate": 0.0001562778759370272, |
| "epoch": 1.3785068949120305, |
| "step": 1450 |
| }, |
| { |
| "loss": 1.1109, |
| "grad_norm": 0.5390228033065796, |
| "learning_rate": 0.00015595259051353927, |
| "epoch": 1.3832620066571564, |
| "step": 1455 |
| }, |
| { |
| "loss": 1.1306, |
| "grad_norm": 0.5381757616996765, |
| "learning_rate": 0.0001556264409361459, |
| "epoch": 1.3880171184022825, |
| "step": 1460 |
| }, |
| { |
| "loss": 1.1052, |
| "grad_norm": 0.5398806929588318, |
| "learning_rate": 0.00015529943224202995, |
| "epoch": 1.3927722301474086, |
| "step": 1465 |
| }, |
| { |
| "loss": 1.1006, |
| "grad_norm": 0.5454993844032288, |
| "learning_rate": 0.00015497156948164284, |
| "epoch": 1.3975273418925345, |
| "step": 1470 |
| }, |
| { |
| "loss": 1.1044, |
| "grad_norm": 0.5269806385040283, |
| "learning_rate": 0.00015464285771862648, |
| "epoch": 1.4022824536376604, |
| "step": 1475 |
| }, |
| { |
| "loss": 1.1084, |
| "grad_norm": 0.5268110632896423, |
| "learning_rate": 0.00015431330202973523, |
| "epoch": 1.4070375653827865, |
| "step": 1480 |
| }, |
| { |
| "loss": 1.1065, |
| "grad_norm": 0.5779821872711182, |
| "learning_rate": 0.00015398290750475723, |
| "epoch": 1.4117926771279126, |
| "step": 1485 |
| }, |
| { |
| "loss": 1.0915, |
| "grad_norm": 0.5298230051994324, |
| "learning_rate": 0.00015365167924643607, |
| "epoch": 1.4165477888730384, |
| "step": 1490 |
| }, |
| { |
| "loss": 1.1086, |
| "grad_norm": 0.6477397680282593, |
| "learning_rate": 0.00015331962237039175, |
| "epoch": 1.4213029006181646, |
| "step": 1495 |
| }, |
| { |
| "loss": 1.0774, |
| "grad_norm": 0.6534600853919983, |
| "learning_rate": 0.00015298674200504183, |
| "epoch": 1.4260580123632907, |
| "step": 1500 |
| }, |
| { |
| "loss": 1.0952, |
| "grad_norm": 0.508335530757904, |
| "learning_rate": 0.0001526530432915221, |
| "epoch": 1.4308131241084165, |
| "step": 1505 |
| }, |
| { |
| "loss": 1.1043, |
| "grad_norm": 0.5168479084968567, |
| "learning_rate": 0.0001523185313836073, |
| "epoch": 1.4355682358535424, |
| "step": 1510 |
| }, |
| { |
| "loss": 1.1106, |
| "grad_norm": 0.5259273052215576, |
| "learning_rate": 0.00015198321144763147, |
| "epoch": 1.4403233475986685, |
| "step": 1515 |
| }, |
| { |
| "loss": 1.0758, |
| "grad_norm": 0.5048487186431885, |
| "learning_rate": 0.0001516470886624081, |
| "epoch": 1.4450784593437946, |
| "step": 1520 |
| }, |
| { |
| "loss": 1.1088, |
| "grad_norm": 0.5271756649017334, |
| "learning_rate": 0.00015131016821915024, |
| "epoch": 1.4498335710889205, |
| "step": 1525 |
| }, |
| { |
| "loss": 1.1129, |
| "grad_norm": 0.5347276926040649, |
| "learning_rate": 0.00015097245532139027, |
| "epoch": 1.4545886828340466, |
| "step": 1530 |
| }, |
| { |
| "loss": 1.0962, |
| "grad_norm": 0.543971836566925, |
| "learning_rate": 0.00015063395518489955, |
| "epoch": 1.4593437945791727, |
| "step": 1535 |
| }, |
| { |
| "loss": 1.1221, |
| "grad_norm": 0.5085065960884094, |
| "learning_rate": 0.00015029467303760789, |
| "epoch": 1.4640989063242986, |
| "step": 1540 |
| }, |
| { |
| "loss": 1.0937, |
| "grad_norm": 0.536890983581543, |
| "learning_rate": 0.00014995461411952275, |
| "epoch": 1.4688540180694245, |
| "step": 1545 |
| }, |
| { |
| "loss": 1.1178, |
| "grad_norm": 0.5335246920585632, |
| "learning_rate": 0.0001496137836826484, |
| "epoch": 1.4736091298145506, |
| "step": 1550 |
| }, |
| { |
| "loss": 1.1055, |
| "grad_norm": 0.5516133308410645, |
| "learning_rate": 0.0001492721869909047, |
| "epoch": 1.4783642415596767, |
| "step": 1555 |
| }, |
| { |
| "loss": 1.1153, |
| "grad_norm": 0.5218647718429565, |
| "learning_rate": 0.00014892982932004583, |
| "epoch": 1.4831193533048026, |
| "step": 1560 |
| }, |
| { |
| "loss": 1.0949, |
| "grad_norm": 0.5604883432388306, |
| "learning_rate": 0.0001485867159575789, |
| "epoch": 1.4878744650499287, |
| "step": 1565 |
| }, |
| { |
| "loss": 1.096, |
| "grad_norm": 0.5131722688674927, |
| "learning_rate": 0.00014824285220268218, |
| "epoch": 1.4926295767950548, |
| "step": 1570 |
| }, |
| { |
| "loss": 1.1235, |
| "grad_norm": 0.5781703591346741, |
| "learning_rate": 0.0001478982433661233, |
| "epoch": 1.4973846885401807, |
| "step": 1575 |
| }, |
| { |
| "loss": 1.0891, |
| "grad_norm": 0.6187899112701416, |
| "learning_rate": 0.0001475528947701772, |
| "epoch": 1.5021398002853066, |
| "step": 1580 |
| }, |
| { |
| "loss": 1.1052, |
| "grad_norm": 0.5558789372444153, |
| "learning_rate": 0.00014720681174854405, |
| "epoch": 1.5068949120304327, |
| "step": 1585 |
| }, |
| { |
| "loss": 1.0887, |
| "grad_norm": 0.551230788230896, |
| "learning_rate": 0.0001468599996462667, |
| "epoch": 1.5116500237755588, |
| "step": 1590 |
| }, |
| { |
| "loss": 1.1097, |
| "grad_norm": 0.5479443669319153, |
| "learning_rate": 0.00014651246381964817, |
| "epoch": 1.5164051355206847, |
| "step": 1595 |
| }, |
| { |
| "loss": 1.0955, |
| "grad_norm": 0.5687084197998047, |
| "learning_rate": 0.00014616420963616908, |
| "epoch": 1.5211602472658108, |
| "step": 1600 |
| }, |
| { |
| "loss": 1.0813, |
| "grad_norm": 0.4998055100440979, |
| "learning_rate": 0.00014581524247440455, |
| "epoch": 1.525915359010937, |
| "step": 1605 |
| }, |
| { |
| "loss": 1.1261, |
| "grad_norm": 0.5627532601356506, |
| "learning_rate": 0.00014546556772394124, |
| "epoch": 1.5306704707560628, |
| "step": 1610 |
| }, |
| { |
| "loss": 1.092, |
| "grad_norm": 0.5437434315681458, |
| "learning_rate": 0.0001451151907852941, |
| "epoch": 1.5354255825011887, |
| "step": 1615 |
| }, |
| { |
| "loss": 1.0857, |
| "grad_norm": 0.5644590854644775, |
| "learning_rate": 0.000144764117069823, |
| "epoch": 1.5401806942463148, |
| "step": 1620 |
| }, |
| { |
| "loss": 1.0995, |
| "grad_norm": 0.5443916320800781, |
| "learning_rate": 0.00014441235199964898, |
| "epoch": 1.5449358059914409, |
| "step": 1625 |
| }, |
| { |
| "loss": 1.09, |
| "grad_norm": 0.5086893439292908, |
| "learning_rate": 0.00014405990100757073, |
| "epoch": 1.5496909177365668, |
| "step": 1630 |
| }, |
| { |
| "loss": 1.1141, |
| "grad_norm": 0.524181604385376, |
| "learning_rate": 0.00014370676953698053, |
| "epoch": 1.5544460294816929, |
| "step": 1635 |
| }, |
| { |
| "loss": 1.0735, |
| "grad_norm": 0.5493620038032532, |
| "learning_rate": 0.00014335296304178035, |
| "epoch": 1.559201141226819, |
| "step": 1640 |
| }, |
| { |
| "loss": 1.0994, |
| "grad_norm": 0.5561903715133667, |
| "learning_rate": 0.0001429984869862974, |
| "epoch": 1.5639562529719448, |
| "step": 1645 |
| }, |
| { |
| "loss": 1.0963, |
| "grad_norm": 0.6156582832336426, |
| "learning_rate": 0.00014264334684519986, |
| "epoch": 1.5687113647170707, |
| "step": 1650 |
| }, |
| { |
| "loss": 1.0997, |
| "grad_norm": 0.5500419735908508, |
| "learning_rate": 0.00014228754810341235, |
| "epoch": 1.5734664764621968, |
| "step": 1655 |
| }, |
| { |
| "loss": 1.0841, |
| "grad_norm": 0.5753553509712219, |
| "learning_rate": 0.00014193109625603115, |
| "epoch": 1.578221588207323, |
| "step": 1660 |
| }, |
| { |
| "loss": 1.1004, |
| "grad_norm": 0.5290947556495667, |
| "learning_rate": 0.0001415739968082393, |
| "epoch": 1.5829766999524488, |
| "step": 1665 |
| }, |
| { |
| "loss": 1.0866, |
| "grad_norm": 0.5203518271446228, |
| "learning_rate": 0.00014121625527522183, |
| "epoch": 1.587731811697575, |
| "step": 1670 |
| }, |
| { |
| "loss": 1.1062, |
| "grad_norm": 0.5417327284812927, |
| "learning_rate": 0.00014085787718208016, |
| "epoch": 1.592486923442701, |
| "step": 1675 |
| }, |
| { |
| "loss": 1.0641, |
| "grad_norm": 0.5337457656860352, |
| "learning_rate": 0.00014049886806374713, |
| "epoch": 1.597242035187827, |
| "step": 1680 |
| }, |
| { |
| "loss": 1.087, |
| "grad_norm": 0.5526561737060547, |
| "learning_rate": 0.0001401392334649013, |
| "epoch": 1.6019971469329528, |
| "step": 1685 |
| }, |
| { |
| "loss": 1.0953, |
| "grad_norm": 0.5583302974700928, |
| "learning_rate": 0.00013977897893988157, |
| "epoch": 1.606752258678079, |
| "step": 1690 |
| }, |
| { |
| "loss": 1.1002, |
| "grad_norm": 0.5573065280914307, |
| "learning_rate": 0.000139418110052601, |
| "epoch": 1.611507370423205, |
| "step": 1695 |
| }, |
| { |
| "loss": 1.0955, |
| "grad_norm": 0.5237279534339905, |
| "learning_rate": 0.0001390566323764612, |
| "epoch": 1.616262482168331, |
| "step": 1700 |
| }, |
| { |
| "loss": 1.0811, |
| "grad_norm": 0.5900450348854065, |
| "learning_rate": 0.0001386945514942662, |
| "epoch": 1.621017593913457, |
| "step": 1705 |
| }, |
| { |
| "loss": 1.0878, |
| "grad_norm": 0.5791476964950562, |
| "learning_rate": 0.00013833187299813613, |
| "epoch": 1.6257727056585831, |
| "step": 1710 |
| }, |
| { |
| "loss": 1.0688, |
| "grad_norm": 0.5293108224868774, |
| "learning_rate": 0.00013796860248942093, |
| "epoch": 1.630527817403709, |
| "step": 1715 |
| }, |
| { |
| "loss": 1.0831, |
| "grad_norm": 0.5247389078140259, |
| "learning_rate": 0.0001376047455786138, |
| "epoch": 1.6352829291488349, |
| "step": 1720 |
| }, |
| { |
| "loss": 1.0897, |
| "grad_norm": 0.541092574596405, |
| "learning_rate": 0.00013724030788526452, |
| "epoch": 1.640038040893961, |
| "step": 1725 |
| }, |
| { |
| "loss": 1.1052, |
| "grad_norm": 0.5626693367958069, |
| "learning_rate": 0.00013687529503789283, |
| "epoch": 1.644793152639087, |
| "step": 1730 |
| }, |
| { |
| "loss": 1.0901, |
| "grad_norm": 0.5883006453514099, |
| "learning_rate": 0.00013650971267390127, |
| "epoch": 1.649548264384213, |
| "step": 1735 |
| }, |
| { |
| "loss": 1.0907, |
| "grad_norm": 0.5166460871696472, |
| "learning_rate": 0.00013614356643948825, |
| "epoch": 1.654303376129339, |
| "step": 1740 |
| }, |
| { |
| "loss": 1.0958, |
| "grad_norm": 0.5543771386146545, |
| "learning_rate": 0.0001357768619895608, |
| "epoch": 1.6590584878744652, |
| "step": 1745 |
| }, |
| { |
| "loss": 1.1102, |
| "grad_norm": 0.6001602411270142, |
| "learning_rate": 0.00013540960498764732, |
| "epoch": 1.663813599619591, |
| "step": 1750 |
| }, |
| { |
| "loss": 1.0833, |
| "grad_norm": 0.5161949992179871, |
| "learning_rate": 0.00013504180110580994, |
| "epoch": 1.668568711364717, |
| "step": 1755 |
| }, |
| { |
| "loss": 1.1053, |
| "grad_norm": 0.5537331700325012, |
| "learning_rate": 0.00013467345602455707, |
| "epoch": 1.673323823109843, |
| "step": 1760 |
| }, |
| { |
| "loss": 1.0893, |
| "grad_norm": 0.5251327753067017, |
| "learning_rate": 0.00013430457543275564, |
| "epoch": 1.6780789348549692, |
| "step": 1765 |
| }, |
| { |
| "loss": 1.0749, |
| "grad_norm": 0.5318305492401123, |
| "learning_rate": 0.0001339351650275432, |
| "epoch": 1.682834046600095, |
| "step": 1770 |
| }, |
| { |
| "loss": 1.0848, |
| "grad_norm": 0.5394341945648193, |
| "learning_rate": 0.0001335652305142399, |
| "epoch": 1.6875891583452212, |
| "step": 1775 |
| }, |
| { |
| "loss": 1.0895, |
| "grad_norm": 0.5451623797416687, |
| "learning_rate": 0.0001331947776062605, |
| "epoch": 1.6923442700903473, |
| "step": 1780 |
| }, |
| { |
| "loss": 1.0826, |
| "grad_norm": 0.5241774916648865, |
| "learning_rate": 0.000132823812025026, |
| "epoch": 1.6970993818354732, |
| "step": 1785 |
| }, |
| { |
| "loss": 1.0994, |
| "grad_norm": 0.5546004772186279, |
| "learning_rate": 0.00013245233949987534, |
| "epoch": 1.701854493580599, |
| "step": 1790 |
| }, |
| { |
| "loss": 1.0778, |
| "grad_norm": 0.5437877178192139, |
| "learning_rate": 0.00013208036576797694, |
| "epoch": 1.7066096053257251, |
| "step": 1795 |
| }, |
| { |
| "loss": 1.1106, |
| "grad_norm": 0.5398416519165039, |
| "learning_rate": 0.00013170789657423996, |
| "epoch": 1.7113647170708512, |
| "step": 1800 |
| }, |
| { |
| "loss": 1.094, |
| "grad_norm": 0.5340137481689453, |
| "learning_rate": 0.0001313349376712258, |
| "epoch": 1.7161198288159771, |
| "step": 1805 |
| }, |
| { |
| "loss": 1.1031, |
| "grad_norm": 0.5068842172622681, |
| "learning_rate": 0.00013096149481905899, |
| "epoch": 1.7208749405611032, |
| "step": 1810 |
| }, |
| { |
| "loss": 1.0635, |
| "grad_norm": 0.5284212827682495, |
| "learning_rate": 0.00013058757378533846, |
| "epoch": 1.7256300523062293, |
| "step": 1815 |
| }, |
| { |
| "loss": 1.0667, |
| "grad_norm": 0.5621690154075623, |
| "learning_rate": 0.0001302131803450483, |
| "epoch": 1.7303851640513552, |
| "step": 1820 |
| }, |
| { |
| "loss": 1.1137, |
| "grad_norm": 0.5420382618904114, |
| "learning_rate": 0.00012983832028046872, |
| "epoch": 1.7351402757964811, |
| "step": 1825 |
| }, |
| { |
| "loss": 1.0766, |
| "grad_norm": 0.5706251859664917, |
| "learning_rate": 0.00012946299938108651, |
| "epoch": 1.7398953875416072, |
| "step": 1830 |
| }, |
| { |
| "loss": 1.0827, |
| "grad_norm": 0.5310079455375671, |
| "learning_rate": 0.00012908722344350597, |
| "epoch": 1.7446504992867333, |
| "step": 1835 |
| }, |
| { |
| "loss": 1.0908, |
| "grad_norm": 0.5003242492675781, |
| "learning_rate": 0.00012871099827135903, |
| "epoch": 1.7494056110318592, |
| "step": 1840 |
| }, |
| { |
| "loss": 1.0631, |
| "grad_norm": 0.5313247442245483, |
| "learning_rate": 0.0001283343296752158, |
| "epoch": 1.754160722776985, |
| "step": 1845 |
| }, |
| { |
| "loss": 1.0932, |
| "grad_norm": 0.5090301632881165, |
| "learning_rate": 0.0001279572234724949, |
| "epoch": 1.7589158345221114, |
| "step": 1850 |
| }, |
| { |
| "loss": 1.0877, |
| "grad_norm": 0.5327184796333313, |
| "learning_rate": 0.00012757968548737344, |
| "epoch": 1.7636709462672373, |
| "step": 1855 |
| }, |
| { |
| "loss": 1.0797, |
| "grad_norm": 0.5266241431236267, |
| "learning_rate": 0.00012720172155069717, |
| "epoch": 1.7684260580123632, |
| "step": 1860 |
| }, |
| { |
| "loss": 1.0864, |
| "grad_norm": 0.5285894274711609, |
| "learning_rate": 0.00012682333749989034, |
| "epoch": 1.7731811697574893, |
| "step": 1865 |
| }, |
| { |
| "loss": 1.0799, |
| "grad_norm": 0.538910984992981, |
| "learning_rate": 0.00012644453917886575, |
| "epoch": 1.7779362815026154, |
| "step": 1870 |
| }, |
| { |
| "loss": 1.085, |
| "grad_norm": 0.5298725962638855, |
| "learning_rate": 0.00012606533243793427, |
| "epoch": 1.7826913932477413, |
| "step": 1875 |
| }, |
| { |
| "loss": 1.0886, |
| "grad_norm": 0.5201819539070129, |
| "learning_rate": 0.00012568572313371448, |
| "epoch": 1.7874465049928672, |
| "step": 1880 |
| }, |
| { |
| "loss": 1.0765, |
| "grad_norm": 0.5403315424919128, |
| "learning_rate": 0.00012530571712904244, |
| "epoch": 1.7922016167379935, |
| "step": 1885 |
| }, |
| { |
| "loss": 1.072, |
| "grad_norm": 0.5282072424888611, |
| "learning_rate": 0.00012492532029288102, |
| "epoch": 1.7969567284831194, |
| "step": 1890 |
| }, |
| { |
| "loss": 1.0857, |
| "grad_norm": 0.523082971572876, |
| "learning_rate": 0.00012454453850022911, |
| "epoch": 1.8017118402282453, |
| "step": 1895 |
| }, |
| { |
| "loss": 1.0899, |
| "grad_norm": 0.5342614650726318, |
| "learning_rate": 0.0001241633776320311, |
| "epoch": 1.8064669519733714, |
| "step": 1900 |
| }, |
| { |
| "loss": 1.0652, |
| "grad_norm": 0.6279932856559753, |
| "learning_rate": 0.00012378184357508606, |
| "epoch": 1.8112220637184975, |
| "step": 1905 |
| }, |
| { |
| "loss": 1.0873, |
| "grad_norm": 0.5402302742004395, |
| "learning_rate": 0.0001233999422219566, |
| "epoch": 1.8159771754636234, |
| "step": 1910 |
| }, |
| { |
| "loss": 1.0732, |
| "grad_norm": 0.5644949078559875, |
| "learning_rate": 0.000123017679470878, |
| "epoch": 1.8207322872087492, |
| "step": 1915 |
| }, |
| { |
| "loss": 1.0811, |
| "grad_norm": 0.6532657742500305, |
| "learning_rate": 0.0001226350612256672, |
| "epoch": 1.8254873989538756, |
| "step": 1920 |
| }, |
| { |
| "loss": 1.0652, |
| "grad_norm": 0.5240359306335449, |
| "learning_rate": 0.00012225209339563145, |
| "epoch": 1.8302425106990015, |
| "step": 1925 |
| }, |
| { |
| "loss": 1.0775, |
| "grad_norm": 0.5336308479309082, |
| "learning_rate": 0.00012186878189547722, |
| "epoch": 1.8349976224441273, |
| "step": 1930 |
| }, |
| { |
| "loss": 1.1083, |
| "grad_norm": 0.5513536334037781, |
| "learning_rate": 0.00012148513264521865, |
| "epoch": 1.8397527341892534, |
| "step": 1935 |
| }, |
| { |
| "loss": 1.0842, |
| "grad_norm": 0.518937349319458, |
| "learning_rate": 0.00012110115157008631, |
| "epoch": 1.8445078459343796, |
| "step": 1940 |
| }, |
| { |
| "loss": 1.0776, |
| "grad_norm": 0.5496499538421631, |
| "learning_rate": 0.00012071684460043556, |
| "epoch": 1.8492629576795054, |
| "step": 1945 |
| }, |
| { |
| "loss": 1.0998, |
| "grad_norm": 0.5657519698143005, |
| "learning_rate": 0.00012033221767165503, |
| "epoch": 1.8540180694246313, |
| "step": 1950 |
| }, |
| { |
| "loss": 1.0775, |
| "grad_norm": 0.5270748734474182, |
| "learning_rate": 0.00011994727672407487, |
| "epoch": 1.8587731811697576, |
| "step": 1955 |
| }, |
| { |
| "loss": 1.0669, |
| "grad_norm": 0.5228608846664429, |
| "learning_rate": 0.00011956202770287514, |
| "epoch": 1.8635282929148835, |
| "step": 1960 |
| }, |
| { |
| "loss": 1.0957, |
| "grad_norm": 0.5587995648384094, |
| "learning_rate": 0.00011917647655799387, |
| "epoch": 1.8682834046600094, |
| "step": 1965 |
| }, |
| { |
| "loss": 1.0861, |
| "grad_norm": 0.5360639691352844, |
| "learning_rate": 0.00011879062924403517, |
| "epoch": 1.8730385164051355, |
| "step": 1970 |
| }, |
| { |
| "loss": 1.062, |
| "grad_norm": 0.5234010815620422, |
| "learning_rate": 0.00011840449172017738, |
| "epoch": 1.8777936281502616, |
| "step": 1975 |
| }, |
| { |
| "loss": 1.0645, |
| "grad_norm": 0.5355650782585144, |
| "learning_rate": 0.00011801806995008089, |
| "epoch": 1.8825487398953875, |
| "step": 1980 |
| }, |
| { |
| "loss": 1.09, |
| "grad_norm": 0.5821070075035095, |
| "learning_rate": 0.00011763136990179618, |
| "epoch": 1.8873038516405134, |
| "step": 1985 |
| }, |
| { |
| "loss": 1.0941, |
| "grad_norm": 0.5609695911407471, |
| "learning_rate": 0.00011724439754767145, |
| "epoch": 1.8920589633856397, |
| "step": 1990 |
| }, |
| { |
| "loss": 1.0943, |
| "grad_norm": 0.5467851758003235, |
| "learning_rate": 0.00011685715886426058, |
| "epoch": 1.8968140751307656, |
| "step": 1995 |
| }, |
| { |
| "loss": 1.0777, |
| "grad_norm": 0.5099024176597595, |
| "learning_rate": 0.00011646965983223079, |
| "epoch": 1.9015691868758915, |
| "step": 2000 |
| }, |
| { |
| "loss": 1.0759, |
| "grad_norm": 0.6141824126243591, |
| "learning_rate": 0.00011608190643627011, |
| "epoch": 1.9063242986210176, |
| "step": 2005 |
| }, |
| { |
| "loss": 1.1035, |
| "grad_norm": 0.5201556086540222, |
| "learning_rate": 0.00011569390466499513, |
| "epoch": 1.9110794103661437, |
| "step": 2010 |
| }, |
| { |
| "loss": 1.0698, |
| "grad_norm": 0.5325519442558289, |
| "learning_rate": 0.00011530566051085845, |
| "epoch": 1.9158345221112696, |
| "step": 2015 |
| }, |
| { |
| "loss": 1.076, |
| "grad_norm": 0.5209614038467407, |
| "learning_rate": 0.00011491717997005611, |
| "epoch": 1.9205896338563955, |
| "step": 2020 |
| }, |
| { |
| "loss": 1.1036, |
| "grad_norm": 0.5825982689857483, |
| "learning_rate": 0.00011452846904243502, |
| "epoch": 1.9253447456015218, |
| "step": 2025 |
| }, |
| { |
| "loss": 1.0776, |
| "grad_norm": 0.5414905548095703, |
| "learning_rate": 0.00011413953373140022, |
| "epoch": 1.9300998573466477, |
| "step": 2030 |
| }, |
| { |
| "loss": 1.0617, |
| "grad_norm": 0.5278269648551941, |
| "learning_rate": 0.00011375038004382234, |
| "epoch": 1.9348549690917736, |
| "step": 2035 |
| }, |
| { |
| "loss": 1.0634, |
| "grad_norm": 0.5142972469329834, |
| "learning_rate": 0.00011336101398994457, |
| "epoch": 1.9396100808368997, |
| "step": 2040 |
| }, |
| { |
| "loss": 1.0839, |
| "grad_norm": 0.5105366706848145, |
| "learning_rate": 0.00011297144158328998, |
| "epoch": 1.9443651925820258, |
| "step": 2045 |
| }, |
| { |
| "loss": 1.0987, |
| "grad_norm": 0.5245667695999146, |
| "learning_rate": 0.00011258166884056873, |
| "epoch": 1.9491203043271517, |
| "step": 2050 |
| }, |
| { |
| "loss": 1.0697, |
| "grad_norm": 0.576498806476593, |
| "learning_rate": 0.000112191701781585, |
| "epoch": 1.9538754160722775, |
| "step": 2055 |
| }, |
| { |
| "loss": 1.0534, |
| "grad_norm": 0.5435449481010437, |
| "learning_rate": 0.00011180154642914402, |
| "epoch": 1.9586305278174037, |
| "step": 2060 |
| }, |
| { |
| "loss": 1.0905, |
| "grad_norm": 0.5367630124092102, |
| "learning_rate": 0.00011141120880895917, |
| "epoch": 1.9633856395625298, |
| "step": 2065 |
| }, |
| { |
| "loss": 1.0759, |
| "grad_norm": 0.5463708639144897, |
| "learning_rate": 0.0001110206949495588, |
| "epoch": 1.9681407513076556, |
| "step": 2070 |
| }, |
| { |
| "loss": 1.0864, |
| "grad_norm": 0.5300132632255554, |
| "learning_rate": 0.00011063001088219315, |
| "epoch": 1.9728958630527818, |
| "step": 2075 |
| }, |
| { |
| "loss": 1.0666, |
| "grad_norm": 0.5325156450271606, |
| "learning_rate": 0.0001102391626407413, |
| "epoch": 1.9776509747979079, |
| "step": 2080 |
| }, |
| { |
| "loss": 1.081, |
| "grad_norm": 0.5473135113716125, |
| "learning_rate": 0.00010984815626161784, |
| "epoch": 1.9824060865430337, |
| "step": 2085 |
| }, |
| { |
| "loss": 1.0891, |
| "grad_norm": 0.5334020853042603, |
| "learning_rate": 0.00010945699778367973, |
| "epoch": 1.9871611982881596, |
| "step": 2090 |
| }, |
| { |
| "loss": 1.0723, |
| "grad_norm": 0.5563669204711914, |
| "learning_rate": 0.00010906569324813292, |
| "epoch": 1.9919163100332857, |
| "step": 2095 |
| }, |
| { |
| "loss": 1.0944, |
| "grad_norm": 0.5293501019477844, |
| "learning_rate": 0.00010867424869843925, |
| "epoch": 1.9966714217784118, |
| "step": 2100 |
| }, |
| { |
| "loss": 1.0713, |
| "grad_norm": 0.5161049365997314, |
| "learning_rate": 0.00010828267018022294, |
| "epoch": 2.000951022349025, |
| "step": 2105 |
| }, |
| { |
| "loss": 1.0329, |
| "grad_norm": 0.5773520469665527, |
| "learning_rate": 0.00010789096374117722, |
| "epoch": 2.0057061340941513, |
| "step": 2110 |
| }, |
| { |
| "loss": 1.0286, |
| "grad_norm": 0.5533373355865479, |
| "learning_rate": 0.0001074991354309711, |
| "epoch": 2.010461245839277, |
| "step": 2115 |
| }, |
| { |
| "loss": 1.0195, |
| "grad_norm": 0.5408231616020203, |
| "learning_rate": 0.00010710719130115572, |
| "epoch": 2.015216357584403, |
| "step": 2120 |
| }, |
| { |
| "loss": 1.0351, |
| "grad_norm": 0.5306239724159241, |
| "learning_rate": 0.00010671513740507102, |
| "epoch": 2.0199714693295294, |
| "step": 2125 |
| }, |
| { |
| "loss": 1.0051, |
| "grad_norm": 0.5489319562911987, |
| "learning_rate": 0.00010632297979775219, |
| "epoch": 2.0247265810746553, |
| "step": 2130 |
| }, |
| { |
| "loss": 1.0484, |
| "grad_norm": 0.5381217002868652, |
| "learning_rate": 0.00010593072453583619, |
| "epoch": 2.029481692819781, |
| "step": 2135 |
| }, |
| { |
| "loss": 1.0277, |
| "grad_norm": 0.5207399129867554, |
| "learning_rate": 0.00010553837767746823, |
| "epoch": 2.034236804564907, |
| "step": 2140 |
| }, |
| { |
| "loss": 1.0077, |
| "grad_norm": 0.5270088315010071, |
| "learning_rate": 0.00010514594528220814, |
| "epoch": 2.0389919163100334, |
| "step": 2145 |
| }, |
| { |
| "loss": 1.0337, |
| "grad_norm": 0.5146133303642273, |
| "learning_rate": 0.00010475343341093677, |
| "epoch": 2.0437470280551593, |
| "step": 2150 |
| }, |
| { |
| "loss": 1.0165, |
| "grad_norm": 0.5402041673660278, |
| "learning_rate": 0.00010436084812576253, |
| "epoch": 2.048502139800285, |
| "step": 2155 |
| }, |
| { |
| "loss": 1.0271, |
| "grad_norm": 0.5281833410263062, |
| "learning_rate": 0.0001039681954899276, |
| "epoch": 2.0532572515454115, |
| "step": 2160 |
| }, |
| { |
| "loss": 1.0474, |
| "grad_norm": 0.5398932695388794, |
| "learning_rate": 0.00010357548156771433, |
| "epoch": 2.0580123632905374, |
| "step": 2165 |
| }, |
| { |
| "loss": 1.05, |
| "grad_norm": 0.5282977223396301, |
| "learning_rate": 0.00010318271242435165, |
| "epoch": 2.0627674750356633, |
| "step": 2170 |
| }, |
| { |
| "loss": 1.0254, |
| "grad_norm": 0.5478774309158325, |
| "learning_rate": 0.00010278989412592135, |
| "epoch": 2.067522586780789, |
| "step": 2175 |
| }, |
| { |
| "loss": 1.0302, |
| "grad_norm": 0.5541023015975952, |
| "learning_rate": 0.0001023970327392643, |
| "epoch": 2.0722776985259155, |
| "step": 2180 |
| }, |
| { |
| "loss": 1.042, |
| "grad_norm": 0.5809475183486938, |
| "learning_rate": 0.000102004134331887, |
| "epoch": 2.0770328102710414, |
| "step": 2185 |
| }, |
| { |
| "loss": 1.0243, |
| "grad_norm": 0.5794262886047363, |
| "learning_rate": 0.00010161120497186756, |
| "epoch": 2.0817879220161672, |
| "step": 2190 |
| }, |
| { |
| "loss": 1.0288, |
| "grad_norm": 0.5598430037498474, |
| "learning_rate": 0.00010121825072776221, |
| "epoch": 2.0865430337612936, |
| "step": 2195 |
| }, |
| { |
| "loss": 1.0366, |
| "grad_norm": 0.5539926290512085, |
| "learning_rate": 0.00010082527766851151, |
| "epoch": 2.0912981455064195, |
| "step": 2200 |
| }, |
| { |
| "loss": 1.019, |
| "grad_norm": 0.6631146669387817, |
| "learning_rate": 0.00010043229186334655, |
| "epoch": 2.0960532572515453, |
| "step": 2205 |
| }, |
| { |
| "loss": 1.0429, |
| "grad_norm": 0.5416038632392883, |
| "learning_rate": 0.00010003929938169532, |
| "epoch": 2.1008083689966712, |
| "step": 2210 |
| }, |
| { |
| "loss": 1.0286, |
| "grad_norm": 0.5672272443771362, |
| "learning_rate": 9.964630629308894e-05, |
| "epoch": 2.1055634807417976, |
| "step": 2215 |
| }, |
| { |
| "loss": 1.0255, |
| "grad_norm": 0.5412415862083435, |
| "learning_rate": 9.925331866706784e-05, |
| "epoch": 2.1103185924869234, |
| "step": 2220 |
| }, |
| { |
| "loss": 1.0072, |
| "grad_norm": 0.5488986968994141, |
| "learning_rate": 9.886034257308813e-05, |
| "epoch": 2.1150737042320493, |
| "step": 2225 |
| }, |
| { |
| "loss": 1.0741, |
| "grad_norm": 0.5471671223640442, |
| "learning_rate": 9.846738408042781e-05, |
| "epoch": 2.1198288159771757, |
| "step": 2230 |
| }, |
| { |
| "loss": 1.0285, |
| "grad_norm": 0.5494662523269653, |
| "learning_rate": 9.807444925809306e-05, |
| "epoch": 2.1245839277223015, |
| "step": 2235 |
| }, |
| { |
| "loss": 1.0427, |
| "grad_norm": 0.578397274017334, |
| "learning_rate": 9.768154417472442e-05, |
| "epoch": 2.1293390394674274, |
| "step": 2240 |
| }, |
| { |
| "loss": 1.0188, |
| "grad_norm": 0.5189716815948486, |
| "learning_rate": 9.728867489850317e-05, |
| "epoch": 2.1340941512125533, |
| "step": 2245 |
| }, |
| { |
| "loss": 1.0133, |
| "grad_norm": 0.5365281701087952, |
| "learning_rate": 9.68958474970576e-05, |
| "epoch": 2.1388492629576796, |
| "step": 2250 |
| }, |
| { |
| "loss": 1.0358, |
| "grad_norm": 0.5608997941017151, |
| "learning_rate": 9.65030680373692e-05, |
| "epoch": 2.1436043747028055, |
| "step": 2255 |
| }, |
| { |
| "loss": 1.0231, |
| "grad_norm": 0.5782570838928223, |
| "learning_rate": 9.61103425856791e-05, |
| "epoch": 2.1483594864479314, |
| "step": 2260 |
| }, |
| { |
| "loss": 1.0349, |
| "grad_norm": 0.5359815955162048, |
| "learning_rate": 9.571767720739425e-05, |
| "epoch": 2.1531145981930577, |
| "step": 2265 |
| }, |
| { |
| "loss": 1.0273, |
| "grad_norm": 0.5552694797515869, |
| "learning_rate": 9.532507796699388e-05, |
| "epoch": 2.1578697099381836, |
| "step": 2270 |
| }, |
| { |
| "loss": 1.0431, |
| "grad_norm": 0.5584545135498047, |
| "learning_rate": 9.493255092793564e-05, |
| "epoch": 2.1626248216833095, |
| "step": 2275 |
| }, |
| { |
| "loss": 1.0354, |
| "grad_norm": 0.5398048162460327, |
| "learning_rate": 9.454010215256218e-05, |
| "epoch": 2.1673799334284354, |
| "step": 2280 |
| }, |
| { |
| "loss": 1.0207, |
| "grad_norm": 0.5423071980476379, |
| "learning_rate": 9.41477377020074e-05, |
| "epoch": 2.1721350451735617, |
| "step": 2285 |
| }, |
| { |
| "loss": 1.025, |
| "grad_norm": 0.5626171827316284, |
| "learning_rate": 9.375546363610278e-05, |
| "epoch": 2.1768901569186876, |
| "step": 2290 |
| }, |
| { |
| "loss": 1.0042, |
| "grad_norm": 0.5627864599227905, |
| "learning_rate": 9.336328601328395e-05, |
| "epoch": 2.1816452686638135, |
| "step": 2295 |
| }, |
| { |
| "loss": 1.0344, |
| "grad_norm": 0.5755117535591125, |
| "learning_rate": 9.2971210890497e-05, |
| "epoch": 2.18640038040894, |
| "step": 2300 |
| }, |
| { |
| "loss": 1.0411, |
| "grad_norm": 0.5628820657730103, |
| "learning_rate": 9.257924432310497e-05, |
| "epoch": 2.1911554921540657, |
| "step": 2305 |
| }, |
| { |
| "loss": 1.0188, |
| "grad_norm": 0.565238356590271, |
| "learning_rate": 9.218739236479432e-05, |
| "epoch": 2.1959106038991916, |
| "step": 2310 |
| }, |
| { |
| "loss": 1.0317, |
| "grad_norm": 0.5473445653915405, |
| "learning_rate": 9.179566106748143e-05, |
| "epoch": 2.2006657156443175, |
| "step": 2315 |
| }, |
| { |
| "loss": 1.0188, |
| "grad_norm": 0.5394167900085449, |
| "learning_rate": 9.140405648121922e-05, |
| "epoch": 2.205420827389444, |
| "step": 2320 |
| }, |
| { |
| "loss": 1.0124, |
| "grad_norm": 0.5691614151000977, |
| "learning_rate": 9.101258465410352e-05, |
| "epoch": 2.2101759391345697, |
| "step": 2325 |
| }, |
| { |
| "loss": 1.0248, |
| "grad_norm": 0.5687462687492371, |
| "learning_rate": 9.062125163217985e-05, |
| "epoch": 2.2149310508796956, |
| "step": 2330 |
| }, |
| { |
| "loss": 1.0279, |
| "grad_norm": 0.5405845642089844, |
| "learning_rate": 9.023006345934997e-05, |
| "epoch": 2.219686162624822, |
| "step": 2335 |
| }, |
| { |
| "loss": 1.0491, |
| "grad_norm": 0.5354245901107788, |
| "learning_rate": 8.983902617727852e-05, |
| "epoch": 2.2244412743699478, |
| "step": 2340 |
| }, |
| { |
| "loss": 1.0454, |
| "grad_norm": 0.5539077520370483, |
| "learning_rate": 8.944814582529967e-05, |
| "epoch": 2.2291963861150736, |
| "step": 2345 |
| }, |
| { |
| "loss": 1.0331, |
| "grad_norm": 0.5702153444290161, |
| "learning_rate": 8.9057428440324e-05, |
| "epoch": 2.2339514978601995, |
| "step": 2350 |
| }, |
| { |
| "loss": 1.0403, |
| "grad_norm": 0.5364651679992676, |
| "learning_rate": 8.866688005674511e-05, |
| "epoch": 2.238706609605326, |
| "step": 2355 |
| }, |
| { |
| "loss": 1.0164, |
| "grad_norm": 0.5560674667358398, |
| "learning_rate": 8.827650670634642e-05, |
| "epoch": 2.2434617213504517, |
| "step": 2360 |
| }, |
| { |
| "loss": 1.0203, |
| "grad_norm": 0.5443471074104309, |
| "learning_rate": 8.788631441820818e-05, |
| "epoch": 2.2482168330955776, |
| "step": 2365 |
| }, |
| { |
| "loss": 1.0199, |
| "grad_norm": 0.5728740692138672, |
| "learning_rate": 8.749630921861416e-05, |
| "epoch": 2.252971944840704, |
| "step": 2370 |
| }, |
| { |
| "loss": 1.0321, |
| "grad_norm": 0.5560939311981201, |
| "learning_rate": 8.710649713095872e-05, |
| "epoch": 2.25772705658583, |
| "step": 2375 |
| }, |
| { |
| "loss": 1.0186, |
| "grad_norm": 0.5494734048843384, |
| "learning_rate": 8.671688417565366e-05, |
| "epoch": 2.2624821683309557, |
| "step": 2380 |
| }, |
| { |
| "loss": 1.038, |
| "grad_norm": 0.5482062101364136, |
| "learning_rate": 8.632747637003533e-05, |
| "epoch": 2.2672372800760816, |
| "step": 2385 |
| }, |
| { |
| "loss": 1.0208, |
| "grad_norm": 0.5288659334182739, |
| "learning_rate": 8.59382797282717e-05, |
| "epoch": 2.271992391821208, |
| "step": 2390 |
| }, |
| { |
| "loss": 1.0262, |
| "grad_norm": 0.554524838924408, |
| "learning_rate": 8.554930026126941e-05, |
| "epoch": 2.276747503566334, |
| "step": 2395 |
| }, |
| { |
| "loss": 1.0235, |
| "grad_norm": 0.6119384765625, |
| "learning_rate": 8.516054397658093e-05, |
| "epoch": 2.2815026153114597, |
| "step": 2400 |
| }, |
| { |
| "loss": 1.0211, |
| "grad_norm": 0.5533767342567444, |
| "learning_rate": 8.477201687831189e-05, |
| "epoch": 2.2862577270565856, |
| "step": 2405 |
| }, |
| { |
| "loss": 1.0269, |
| "grad_norm": 0.5291370153427124, |
| "learning_rate": 8.438372496702826e-05, |
| "epoch": 2.291012838801712, |
| "step": 2410 |
| }, |
| { |
| "loss": 1.0244, |
| "grad_norm": 0.5709149241447449, |
| "learning_rate": 8.39956742396636e-05, |
| "epoch": 2.295767950546838, |
| "step": 2415 |
| }, |
| { |
| "loss": 1.034, |
| "grad_norm": 0.5471912622451782, |
| "learning_rate": 8.360787068942663e-05, |
| "epoch": 2.3005230622919637, |
| "step": 2420 |
| }, |
| { |
| "loss": 1.011, |
| "grad_norm": 0.5538129210472107, |
| "learning_rate": 8.322032030570856e-05, |
| "epoch": 2.30527817403709, |
| "step": 2425 |
| }, |
| { |
| "loss": 1.0297, |
| "grad_norm": 0.5439838767051697, |
| "learning_rate": 8.283302907399055e-05, |
| "epoch": 2.310033285782216, |
| "step": 2430 |
| }, |
| { |
| "loss": 1.0308, |
| "grad_norm": 0.5352471470832825, |
| "learning_rate": 8.244600297575134e-05, |
| "epoch": 2.314788397527342, |
| "step": 2435 |
| }, |
| { |
| "loss": 1.0217, |
| "grad_norm": 0.5386447906494141, |
| "learning_rate": 8.205924798837484e-05, |
| "epoch": 2.319543509272468, |
| "step": 2440 |
| }, |
| { |
| "loss": 1.0136, |
| "grad_norm": 0.5460801720619202, |
| "learning_rate": 8.167277008505783e-05, |
| "epoch": 2.324298621017594, |
| "step": 2445 |
| }, |
| { |
| "loss": 1.0208, |
| "grad_norm": 0.5513489842414856, |
| "learning_rate": 8.128657523471765e-05, |
| "epoch": 2.32905373276272, |
| "step": 2450 |
| }, |
| { |
| "loss": 1.0378, |
| "grad_norm": 0.5452367663383484, |
| "learning_rate": 8.090066940190014e-05, |
| "epoch": 2.3338088445078458, |
| "step": 2455 |
| }, |
| { |
| "loss": 1.0164, |
| "grad_norm": 0.5363667011260986, |
| "learning_rate": 8.051505854668733e-05, |
| "epoch": 2.338563956252972, |
| "step": 2460 |
| }, |
| { |
| "loss": 1.0356, |
| "grad_norm": 0.5610619187355042, |
| "learning_rate": 8.012974862460562e-05, |
| "epoch": 2.343319067998098, |
| "step": 2465 |
| }, |
| { |
| "loss": 1.0255, |
| "grad_norm": 0.5659564733505249, |
| "learning_rate": 7.974474558653357e-05, |
| "epoch": 2.348074179743224, |
| "step": 2470 |
| }, |
| { |
| "loss": 1.018, |
| "grad_norm": 0.5721662640571594, |
| "learning_rate": 7.936005537861016e-05, |
| "epoch": 2.3528292914883497, |
| "step": 2475 |
| }, |
| { |
| "loss": 1.023, |
| "grad_norm": 0.5471690893173218, |
| "learning_rate": 7.897568394214292e-05, |
| "epoch": 2.357584403233476, |
| "step": 2480 |
| }, |
| { |
| "loss": 1.0236, |
| "grad_norm": 0.5676785707473755, |
| "learning_rate": 7.859163721351608e-05, |
| "epoch": 2.362339514978602, |
| "step": 2485 |
| }, |
| { |
| "loss": 1.0084, |
| "grad_norm": 0.5501339435577393, |
| "learning_rate": 7.8207921124099e-05, |
| "epoch": 2.367094626723728, |
| "step": 2490 |
| }, |
| { |
| "loss": 1.0068, |
| "grad_norm": 0.527346134185791, |
| "learning_rate": 7.782454160015453e-05, |
| "epoch": 2.371849738468854, |
| "step": 2495 |
| }, |
| { |
| "loss": 1.0191, |
| "grad_norm": 0.5971363186836243, |
| "learning_rate": 7.744150456274742e-05, |
| "epoch": 2.37660485021398, |
| "step": 2500 |
| }, |
| { |
| "loss": 1.0187, |
| "grad_norm": 0.5525999665260315, |
| "learning_rate": 7.705881592765295e-05, |
| "epoch": 2.381359961959106, |
| "step": 2505 |
| }, |
| { |
| "loss": 1.017, |
| "grad_norm": 0.5548810362815857, |
| "learning_rate": 7.667648160526555e-05, |
| "epoch": 2.3861150737042323, |
| "step": 2510 |
| }, |
| { |
| "loss": 1.0251, |
| "grad_norm": 0.561303436756134, |
| "learning_rate": 7.62945075005075e-05, |
| "epoch": 2.390870185449358, |
| "step": 2515 |
| }, |
| { |
| "loss": 1.0238, |
| "grad_norm": 0.585380494594574, |
| "learning_rate": 7.59128995127377e-05, |
| "epoch": 2.395625297194484, |
| "step": 2520 |
| }, |
| { |
| "loss": 1.0465, |
| "grad_norm": 0.5896022319793701, |
| "learning_rate": 7.553166353566064e-05, |
| "epoch": 2.40038040893961, |
| "step": 2525 |
| }, |
| { |
| "loss": 1.0077, |
| "grad_norm": 0.5665650963783264, |
| "learning_rate": 7.515080545723532e-05, |
| "epoch": 2.4051355206847362, |
| "step": 2530 |
| }, |
| { |
| "loss": 1.0438, |
| "grad_norm": 0.5852140784263611, |
| "learning_rate": 7.477033115958432e-05, |
| "epoch": 2.409890632429862, |
| "step": 2535 |
| }, |
| { |
| "loss": 1.0195, |
| "grad_norm": 0.5346912741661072, |
| "learning_rate": 7.439024651890292e-05, |
| "epoch": 2.414645744174988, |
| "step": 2540 |
| }, |
| { |
| "loss": 1.0122, |
| "grad_norm": 0.5390799045562744, |
| "learning_rate": 7.401055740536844e-05, |
| "epoch": 2.419400855920114, |
| "step": 2545 |
| }, |
| { |
| "loss": 1.0139, |
| "grad_norm": 0.5338611602783203, |
| "learning_rate": 7.363126968304955e-05, |
| "epoch": 2.42415596766524, |
| "step": 2550 |
| }, |
| { |
| "loss": 1.0128, |
| "grad_norm": 0.5642942786216736, |
| "learning_rate": 7.325238920981554e-05, |
| "epoch": 2.428911079410366, |
| "step": 2555 |
| }, |
| { |
| "loss": 1.015, |
| "grad_norm": 0.5479723215103149, |
| "learning_rate": 7.287392183724611e-05, |
| "epoch": 2.433666191155492, |
| "step": 2560 |
| }, |
| { |
| "loss": 1.0228, |
| "grad_norm": 0.5540456175804138, |
| "learning_rate": 7.249587341054085e-05, |
| "epoch": 2.4384213029006183, |
| "step": 2565 |
| }, |
| { |
| "loss": 1.0159, |
| "grad_norm": 0.5744130611419678, |
| "learning_rate": 7.211824976842896e-05, |
| "epoch": 2.443176414645744, |
| "step": 2570 |
| }, |
| { |
| "loss": 1.0165, |
| "grad_norm": 0.5684532523155212, |
| "learning_rate": 7.174105674307907e-05, |
| "epoch": 2.44793152639087, |
| "step": 2575 |
| }, |
| { |
| "loss": 1.0001, |
| "grad_norm": 0.564030110836029, |
| "learning_rate": 7.136430016000926e-05, |
| "epoch": 2.4526866381359964, |
| "step": 2580 |
| }, |
| { |
| "loss": 1.0251, |
| "grad_norm": 0.5820625424385071, |
| "learning_rate": 7.098798583799701e-05, |
| "epoch": 2.4574417498811223, |
| "step": 2585 |
| }, |
| { |
| "loss": 1.0137, |
| "grad_norm": 0.5815747976303101, |
| "learning_rate": 7.06121195889893e-05, |
| "epoch": 2.462196861626248, |
| "step": 2590 |
| }, |
| { |
| "loss": 1.0385, |
| "grad_norm": 0.5593292117118835, |
| "learning_rate": 7.023670721801292e-05, |
| "epoch": 2.466951973371374, |
| "step": 2595 |
| }, |
| { |
| "loss": 1.0078, |
| "grad_norm": 0.5584039688110352, |
| "learning_rate": 6.986175452308485e-05, |
| "epoch": 2.4717070851165004, |
| "step": 2600 |
| }, |
| { |
| "loss": 1.0253, |
| "grad_norm": 0.5741851329803467, |
| "learning_rate": 6.94872672951225e-05, |
| "epoch": 2.4764621968616263, |
| "step": 2605 |
| }, |
| { |
| "loss": 1.028, |
| "grad_norm": 0.5736190676689148, |
| "learning_rate": 6.911325131785455e-05, |
| "epoch": 2.481217308606752, |
| "step": 2610 |
| }, |
| { |
| "loss": 1.0253, |
| "grad_norm": 0.5480940937995911, |
| "learning_rate": 6.873971236773149e-05, |
| "epoch": 2.485972420351878, |
| "step": 2615 |
| }, |
| { |
| "loss": 1.0262, |
| "grad_norm": 0.5847823619842529, |
| "learning_rate": 6.836665621383635e-05, |
| "epoch": 2.4907275320970044, |
| "step": 2620 |
| }, |
| { |
| "loss": 1.021, |
| "grad_norm": 0.5426408052444458, |
| "learning_rate": 6.799408861779573e-05, |
| "epoch": 2.4954826438421303, |
| "step": 2625 |
| }, |
| { |
| "loss": 1.0225, |
| "grad_norm": 0.5521904826164246, |
| "learning_rate": 6.762201533369074e-05, |
| "epoch": 2.500237755587256, |
| "step": 2630 |
| }, |
| { |
| "loss": 1.018, |
| "grad_norm": 0.5548136830329895, |
| "learning_rate": 6.725044210796807e-05, |
| "epoch": 2.5049928673323825, |
| "step": 2635 |
| }, |
| { |
| "loss": 1.0111, |
| "grad_norm": 0.5314429402351379, |
| "learning_rate": 6.687937467935141e-05, |
| "epoch": 2.5097479790775084, |
| "step": 2640 |
| }, |
| { |
| "loss": 1.0176, |
| "grad_norm": 0.5654233694076538, |
| "learning_rate": 6.650881877875268e-05, |
| "epoch": 2.5145030908226342, |
| "step": 2645 |
| }, |
| { |
| "loss": 1.008, |
| "grad_norm": 0.5522738695144653, |
| "learning_rate": 6.613878012918351e-05, |
| "epoch": 2.5192582025677606, |
| "step": 2650 |
| }, |
| { |
| "loss": 1.0218, |
| "grad_norm": 0.5323864817619324, |
| "learning_rate": 6.576926444566698e-05, |
| "epoch": 2.5240133143128864, |
| "step": 2655 |
| }, |
| { |
| "loss": 1.0044, |
| "grad_norm": 0.5562161803245544, |
| "learning_rate": 6.540027743514923e-05, |
| "epoch": 2.5287684260580123, |
| "step": 2660 |
| }, |
| { |
| "loss": 0.9987, |
| "grad_norm": 0.5672735571861267, |
| "learning_rate": 6.50318247964114e-05, |
| "epoch": 2.533523537803138, |
| "step": 2665 |
| }, |
| { |
| "loss": 1.0256, |
| "grad_norm": 0.5885368585586548, |
| "learning_rate": 6.466391221998149e-05, |
| "epoch": 2.5382786495482645, |
| "step": 2670 |
| }, |
| { |
| "loss": 1.0026, |
| "grad_norm": 0.5668426156044006, |
| "learning_rate": 6.429654538804668e-05, |
| "epoch": 2.5430337612933904, |
| "step": 2675 |
| }, |
| { |
| "loss": 1.0131, |
| "grad_norm": 0.5882384181022644, |
| "learning_rate": 6.392972997436544e-05, |
| "epoch": 2.5477888730385163, |
| "step": 2680 |
| }, |
| { |
| "loss": 1.0166, |
| "grad_norm": 0.5504544973373413, |
| "learning_rate": 6.356347164417982e-05, |
| "epoch": 2.552543984783642, |
| "step": 2685 |
| }, |
| { |
| "loss": 1.0296, |
| "grad_norm": 0.5450078248977661, |
| "learning_rate": 6.319777605412816e-05, |
| "epoch": 2.5572990965287685, |
| "step": 2690 |
| }, |
| { |
| "loss": 1.0344, |
| "grad_norm": 0.5678418278694153, |
| "learning_rate": 6.283264885215756e-05, |
| "epoch": 2.5620542082738944, |
| "step": 2695 |
| }, |
| { |
| "loss": 1.0156, |
| "grad_norm": 0.5520192384719849, |
| "learning_rate": 6.246809567743676e-05, |
| "epoch": 2.5668093200190203, |
| "step": 2700 |
| }, |
| { |
| "loss": 1.0153, |
| "grad_norm": 0.5562868714332581, |
| "learning_rate": 6.210412216026891e-05, |
| "epoch": 2.5715644317641466, |
| "step": 2705 |
| }, |
| { |
| "loss": 1.0136, |
| "grad_norm": 0.5854929685592651, |
| "learning_rate": 6.17407339220048e-05, |
| "epoch": 2.5763195435092725, |
| "step": 2710 |
| }, |
| { |
| "loss": 1.0307, |
| "grad_norm": 0.5546239614486694, |
| "learning_rate": 6.137793657495587e-05, |
| "epoch": 2.5810746552543984, |
| "step": 2715 |
| }, |
| { |
| "loss": 1.0348, |
| "grad_norm": 0.5063544511795044, |
| "learning_rate": 6.1015735722307666e-05, |
| "epoch": 2.5858297669995247, |
| "step": 2720 |
| }, |
| { |
| "loss": 1.0296, |
| "grad_norm": 0.6425706148147583, |
| "learning_rate": 6.065413695803316e-05, |
| "epoch": 2.5905848787446506, |
| "step": 2725 |
| }, |
| { |
| "loss": 1.0471, |
| "grad_norm": 0.5719513297080994, |
| "learning_rate": 6.0293145866806474e-05, |
| "epoch": 2.5953399904897765, |
| "step": 2730 |
| }, |
| { |
| "loss": 1.0058, |
| "grad_norm": 0.5438857674598694, |
| "learning_rate": 5.9932768023916655e-05, |
| "epoch": 2.6000951022349024, |
| "step": 2735 |
| }, |
| { |
| "loss": 1.0212, |
| "grad_norm": 0.5680578947067261, |
| "learning_rate": 5.957300899518134e-05, |
| "epoch": 2.6048502139800287, |
| "step": 2740 |
| }, |
| { |
| "loss": 1.0377, |
| "grad_norm": 0.544715166091919, |
| "learning_rate": 5.9213874336861094e-05, |
| "epoch": 2.6096053257251546, |
| "step": 2745 |
| }, |
| { |
| "loss": 1.0202, |
| "grad_norm": 0.5508673191070557, |
| "learning_rate": 5.885536959557343e-05, |
| "epoch": 2.6143604374702805, |
| "step": 2750 |
| }, |
| { |
| "loss": 1.0242, |
| "grad_norm": 0.5810195207595825, |
| "learning_rate": 5.849750030820717e-05, |
| "epoch": 2.6191155492154063, |
| "step": 2755 |
| }, |
| { |
| "loss": 1.0311, |
| "grad_norm": 0.5465600490570068, |
| "learning_rate": 5.814027200183685e-05, |
| "epoch": 2.6238706609605327, |
| "step": 2760 |
| }, |
| { |
| "loss": 1.0251, |
| "grad_norm": 0.5547868609428406, |
| "learning_rate": 5.7783690193637586e-05, |
| "epoch": 2.6286257727056586, |
| "step": 2765 |
| }, |
| { |
| "loss": 1.0372, |
| "grad_norm": 0.5794418454170227, |
| "learning_rate": 5.742776039079961e-05, |
| "epoch": 2.6333808844507844, |
| "step": 2770 |
| }, |
| { |
| "loss": 1.0264, |
| "grad_norm": 0.5423664450645447, |
| "learning_rate": 5.7072488090443386e-05, |
| "epoch": 2.6381359961959108, |
| "step": 2775 |
| }, |
| { |
| "loss": 1.015, |
| "grad_norm": 0.5447655916213989, |
| "learning_rate": 5.6717878779534675e-05, |
| "epoch": 2.6428911079410367, |
| "step": 2780 |
| }, |
| { |
| "loss": 0.9949, |
| "grad_norm": 0.5512862801551819, |
| "learning_rate": 5.636393793479961e-05, |
| "epoch": 2.6476462196861625, |
| "step": 2785 |
| }, |
| { |
| "loss": 1.0192, |
| "grad_norm": 0.5737626552581787, |
| "learning_rate": 5.601067102264046e-05, |
| "epoch": 2.652401331431289, |
| "step": 2790 |
| }, |
| { |
| "loss": 1.0196, |
| "grad_norm": 0.5808326601982117, |
| "learning_rate": 5.56580834990509e-05, |
| "epoch": 2.6571564431764148, |
| "step": 2795 |
| }, |
| { |
| "loss": 1.0195, |
| "grad_norm": 0.5680710077285767, |
| "learning_rate": 5.530618080953193e-05, |
| "epoch": 2.6619115549215406, |
| "step": 2800 |
| }, |
| { |
| "loss": 1.0037, |
| "grad_norm": 0.549957275390625, |
| "learning_rate": 5.495496838900763e-05, |
| "epoch": 2.6666666666666665, |
| "step": 2805 |
| }, |
| { |
| "loss": 1.0123, |
| "grad_norm": 0.5589137673377991, |
| "learning_rate": 5.4604451661741354e-05, |
| "epoch": 2.6714217784117924, |
| "step": 2810 |
| }, |
| { |
| "loss": 1.0282, |
| "grad_norm": 0.5467957854270935, |
| "learning_rate": 5.4254636041251886e-05, |
| "epoch": 2.6761768901569187, |
| "step": 2815 |
| }, |
| { |
| "loss": 1.0055, |
| "grad_norm": 0.5696823000907898, |
| "learning_rate": 5.390552693022974e-05, |
| "epoch": 2.6809320019020446, |
| "step": 2820 |
| }, |
| { |
| "loss": 1.032, |
| "grad_norm": 0.5576555728912354, |
| "learning_rate": 5.355712972045399e-05, |
| "epoch": 2.6856871136471705, |
| "step": 2825 |
| }, |
| { |
| "loss": 1.0301, |
| "grad_norm": 0.5574038624763489, |
| "learning_rate": 5.320944979270868e-05, |
| "epoch": 2.690442225392297, |
| "step": 2830 |
| }, |
| { |
| "loss": 1.034, |
| "grad_norm": 0.5673926472663879, |
| "learning_rate": 5.286249251669996e-05, |
| "epoch": 2.6951973371374227, |
| "step": 2835 |
| }, |
| { |
| "loss": 1.0398, |
| "grad_norm": 0.5725728869438171, |
| "learning_rate": 5.2516263250973005e-05, |
| "epoch": 2.6999524488825486, |
| "step": 2840 |
| }, |
| { |
| "loss": 1.0267, |
| "grad_norm": 0.5911303162574768, |
| "learning_rate": 5.217076734282935e-05, |
| "epoch": 2.704707560627675, |
| "step": 2845 |
| }, |
| { |
| "loss": 1.0274, |
| "grad_norm": 0.569084107875824, |
| "learning_rate": 5.1826010128244273e-05, |
| "epoch": 2.709462672372801, |
| "step": 2850 |
| }, |
| { |
| "loss": 1.0291, |
| "grad_norm": 0.5734261870384216, |
| "learning_rate": 5.148199693178428e-05, |
| "epoch": 2.7142177841179267, |
| "step": 2855 |
| }, |
| { |
| "loss": 1.0193, |
| "grad_norm": 0.5936700701713562, |
| "learning_rate": 5.113873306652508e-05, |
| "epoch": 2.718972895863053, |
| "step": 2860 |
| }, |
| { |
| "loss": 1.0273, |
| "grad_norm": 0.5567139387130737, |
| "learning_rate": 5.079622383396938e-05, |
| "epoch": 2.723728007608179, |
| "step": 2865 |
| }, |
| { |
| "loss": 1.016, |
| "grad_norm": 0.5393930673599243, |
| "learning_rate": 5.0454474523965025e-05, |
| "epoch": 2.728483119353305, |
| "step": 2870 |
| }, |
| { |
| "loss": 1.0194, |
| "grad_norm": 0.5699242353439331, |
| "learning_rate": 5.011349041462333e-05, |
| "epoch": 2.7332382310984307, |
| "step": 2875 |
| }, |
| { |
| "loss": 1.031, |
| "grad_norm": 0.565233051776886, |
| "learning_rate": 4.9773276772237554e-05, |
| "epoch": 2.7379933428435566, |
| "step": 2880 |
| }, |
| { |
| "loss": 1.0254, |
| "grad_norm": 0.6722928285598755, |
| "learning_rate": 4.9433838851201574e-05, |
| "epoch": 2.742748454588683, |
| "step": 2885 |
| }, |
| { |
| "loss": 1.0223, |
| "grad_norm": 0.5745609998703003, |
| "learning_rate": 4.909518189392864e-05, |
| "epoch": 2.7475035663338088, |
| "step": 2890 |
| }, |
| { |
| "loss": 1.0099, |
| "grad_norm": 0.5707256197929382, |
| "learning_rate": 4.8757311130770576e-05, |
| "epoch": 2.7522586780789347, |
| "step": 2895 |
| }, |
| { |
| "loss": 1.002, |
| "grad_norm": 0.5639123320579529, |
| "learning_rate": 4.8420231779936885e-05, |
| "epoch": 2.757013789824061, |
| "step": 2900 |
| }, |
| { |
| "loss": 1.0193, |
| "grad_norm": 0.5600976943969727, |
| "learning_rate": 4.808394904741419e-05, |
| "epoch": 2.761768901569187, |
| "step": 2905 |
| }, |
| { |
| "loss": 1.0347, |
| "grad_norm": 0.5333346128463745, |
| "learning_rate": 4.774846812688582e-05, |
| "epoch": 2.7665240133143127, |
| "step": 2910 |
| }, |
| { |
| "loss": 1.0115, |
| "grad_norm": 0.5352510809898376, |
| "learning_rate": 4.741379419965158e-05, |
| "epoch": 2.771279125059439, |
| "step": 2915 |
| }, |
| { |
| "loss": 1.0061, |
| "grad_norm": 0.5762399435043335, |
| "learning_rate": 4.7079932434547855e-05, |
| "epoch": 2.776034236804565, |
| "step": 2920 |
| }, |
| { |
| "loss": 1.0102, |
| "grad_norm": 0.561637282371521, |
| "learning_rate": 4.6746887987867505e-05, |
| "epoch": 2.780789348549691, |
| "step": 2925 |
| }, |
| { |
| "loss": 1.0266, |
| "grad_norm": 0.5563335418701172, |
| "learning_rate": 4.641466600328054e-05, |
| "epoch": 2.785544460294817, |
| "step": 2930 |
| }, |
| { |
| "loss": 1.029, |
| "grad_norm": 0.5671743750572205, |
| "learning_rate": 4.6083271611754476e-05, |
| "epoch": 2.790299572039943, |
| "step": 2935 |
| }, |
| { |
| "loss": 1.0447, |
| "grad_norm": 0.7555620074272156, |
| "learning_rate": 4.575270993147518e-05, |
| "epoch": 2.795054683785069, |
| "step": 2940 |
| }, |
| { |
| "loss": 1.0087, |
| "grad_norm": 0.5559315085411072, |
| "learning_rate": 4.5422986067767817e-05, |
| "epoch": 2.799809795530195, |
| "step": 2945 |
| }, |
| { |
| "loss": 1.0105, |
| "grad_norm": 0.5610739588737488, |
| "learning_rate": 4.5094105113017925e-05, |
| "epoch": 2.8045649072753207, |
| "step": 2950 |
| }, |
| { |
| "loss": 1.0005, |
| "grad_norm": 0.5655112266540527, |
| "learning_rate": 4.476607214659291e-05, |
| "epoch": 2.809320019020447, |
| "step": 2955 |
| }, |
| { |
| "loss": 1.0205, |
| "grad_norm": 0.5416660308837891, |
| "learning_rate": 4.443889223476337e-05, |
| "epoch": 2.814075130765573, |
| "step": 2960 |
| }, |
| { |
| "loss": 1.0055, |
| "grad_norm": 0.5437497496604919, |
| "learning_rate": 4.4112570430625147e-05, |
| "epoch": 2.818830242510699, |
| "step": 2965 |
| }, |
| { |
| "loss": 0.9984, |
| "grad_norm": 0.5660828351974487, |
| "learning_rate": 4.3787111774021014e-05, |
| "epoch": 2.823585354255825, |
| "step": 2970 |
| }, |
| { |
| "loss": 1.014, |
| "grad_norm": 0.5344516634941101, |
| "learning_rate": 4.346252129146303e-05, |
| "epoch": 2.828340466000951, |
| "step": 2975 |
| }, |
| { |
| "loss": 1.0021, |
| "grad_norm": 0.5585253834724426, |
| "learning_rate": 4.31388039960548e-05, |
| "epoch": 2.833095577746077, |
| "step": 2980 |
| }, |
| { |
| "loss": 1.0125, |
| "grad_norm": 0.5848186016082764, |
| "learning_rate": 4.2815964887414063e-05, |
| "epoch": 2.8378506894912032, |
| "step": 2985 |
| }, |
| { |
| "loss": 1.003, |
| "grad_norm": 0.5451860427856445, |
| "learning_rate": 4.249400895159555e-05, |
| "epoch": 2.842605801236329, |
| "step": 2990 |
| }, |
| { |
| "loss": 1.0149, |
| "grad_norm": 0.5858671069145203, |
| "learning_rate": 4.217294116101384e-05, |
| "epoch": 2.847360912981455, |
| "step": 2995 |
| }, |
| { |
| "loss": 1.0179, |
| "grad_norm": 0.5558703541755676, |
| "learning_rate": 4.185276647436669e-05, |
| "epoch": 2.8521160247265813, |
| "step": 3000 |
| }, |
| { |
| "loss": 1.0234, |
| "grad_norm": 0.561964213848114, |
| "learning_rate": 4.153348983655844e-05, |
| "epoch": 2.856871136471707, |
| "step": 3005 |
| }, |
| { |
| "loss": 1.0075, |
| "grad_norm": 0.5718274712562561, |
| "learning_rate": 4.121511617862355e-05, |
| "epoch": 2.861626248216833, |
| "step": 3010 |
| }, |
| { |
| "loss": 1.0199, |
| "grad_norm": 0.5498273968696594, |
| "learning_rate": 4.089765041765056e-05, |
| "epoch": 2.866381359961959, |
| "step": 3015 |
| }, |
| { |
| "loss": 1.0298, |
| "grad_norm": 0.5468445420265198, |
| "learning_rate": 4.058109745670603e-05, |
| "epoch": 2.871136471707085, |
| "step": 3020 |
| }, |
| { |
| "loss": 1.0082, |
| "grad_norm": 0.5609626770019531, |
| "learning_rate": 4.026546218475896e-05, |
| "epoch": 2.875891583452211, |
| "step": 3025 |
| }, |
| { |
| "loss": 0.9981, |
| "grad_norm": 0.5522668957710266, |
| "learning_rate": 3.9950749476605074e-05, |
| "epoch": 2.880646695197337, |
| "step": 3030 |
| }, |
| { |
| "loss": 1.0134, |
| "grad_norm": 0.5732723474502563, |
| "learning_rate": 3.963696419279175e-05, |
| "epoch": 2.885401806942463, |
| "step": 3035 |
| }, |
| { |
| "loss": 1.0132, |
| "grad_norm": 0.5638845562934875, |
| "learning_rate": 3.932411117954286e-05, |
| "epoch": 2.8901569186875893, |
| "step": 3040 |
| }, |
| { |
| "loss": 1.0001, |
| "grad_norm": 0.5500783920288086, |
| "learning_rate": 3.901219526868387e-05, |
| "epoch": 2.894912030432715, |
| "step": 3045 |
| }, |
| { |
| "loss": 0.9974, |
| "grad_norm": 0.5504855513572693, |
| "learning_rate": 3.87012212775673e-05, |
| "epoch": 2.899667142177841, |
| "step": 3050 |
| }, |
| { |
| "loss": 0.9985, |
| "grad_norm": 0.5669914484024048, |
| "learning_rate": 3.83911940089983e-05, |
| "epoch": 2.9044222539229674, |
| "step": 3055 |
| }, |
| { |
| "loss": 1.0033, |
| "grad_norm": 0.5380266308784485, |
| "learning_rate": 3.808211825116047e-05, |
| "epoch": 2.9091773656680933, |
| "step": 3060 |
| }, |
| { |
| "loss": 1.0094, |
| "grad_norm": 0.5351689457893372, |
| "learning_rate": 3.77739987775418e-05, |
| "epoch": 2.913932477413219, |
| "step": 3065 |
| }, |
| { |
| "loss": 1.0049, |
| "grad_norm": 0.5727502107620239, |
| "learning_rate": 3.746684034686118e-05, |
| "epoch": 2.9186875891583455, |
| "step": 3070 |
| }, |
| { |
| "loss": 1.0105, |
| "grad_norm": 0.5672404766082764, |
| "learning_rate": 3.71606477029947e-05, |
| "epoch": 2.9234427009034714, |
| "step": 3075 |
| }, |
| { |
| "loss": 1.009, |
| "grad_norm": 0.5589544773101807, |
| "learning_rate": 3.685542557490249e-05, |
| "epoch": 2.9281978126485972, |
| "step": 3080 |
| }, |
| { |
| "loss": 1.0215, |
| "grad_norm": 0.5531269907951355, |
| "learning_rate": 3.6551178676555644e-05, |
| "epoch": 2.932952924393723, |
| "step": 3085 |
| }, |
| { |
| "loss": 1.0039, |
| "grad_norm": 0.541460394859314, |
| "learning_rate": 3.62479117068634e-05, |
| "epoch": 2.937708036138849, |
| "step": 3090 |
| }, |
| { |
| "loss": 1.0068, |
| "grad_norm": 0.5556807518005371, |
| "learning_rate": 3.594562934960066e-05, |
| "epoch": 2.9424631478839753, |
| "step": 3095 |
| }, |
| { |
| "loss": 1.0126, |
| "grad_norm": 0.5401962995529175, |
| "learning_rate": 3.564433627333546e-05, |
| "epoch": 2.9472182596291012, |
| "step": 3100 |
| }, |
| { |
| "loss": 1.0239, |
| "grad_norm": 0.5615110397338867, |
| "learning_rate": 3.534403713135709e-05, |
| "epoch": 2.951973371374227, |
| "step": 3105 |
| }, |
| { |
| "loss": 1.0213, |
| "grad_norm": 0.5481522083282471, |
| "learning_rate": 3.50447365616041e-05, |
| "epoch": 2.9567284831193534, |
| "step": 3110 |
| }, |
| { |
| "loss": 1.0212, |
| "grad_norm": 0.5491989850997925, |
| "learning_rate": 3.474643918659268e-05, |
| "epoch": 2.9614835948644793, |
| "step": 3115 |
| }, |
| { |
| "loss": 1.0281, |
| "grad_norm": 0.5463298559188843, |
| "learning_rate": 3.444914961334532e-05, |
| "epoch": 2.966238706609605, |
| "step": 3120 |
| }, |
| { |
| "loss": 1.0119, |
| "grad_norm": 0.548101544380188, |
| "learning_rate": 3.4152872433319585e-05, |
| "epoch": 2.9709938183547315, |
| "step": 3125 |
| }, |
| { |
| "loss": 1.0041, |
| "grad_norm": 0.5650261044502258, |
| "learning_rate": 3.3857612222337284e-05, |
| "epoch": 2.9757489300998574, |
| "step": 3130 |
| }, |
| { |
| "loss": 1.034, |
| "grad_norm": 0.5379553437232971, |
| "learning_rate": 3.356337354051366e-05, |
| "epoch": 2.9805040418449833, |
| "step": 3135 |
| }, |
| { |
| "loss": 1.0135, |
| "grad_norm": 0.5453470349311829, |
| "learning_rate": 3.3270160932187164e-05, |
| "epoch": 2.9852591535901096, |
| "step": 3140 |
| }, |
| { |
| "loss": 1.0161, |
| "grad_norm": 0.5555306077003479, |
| "learning_rate": 3.297797892584912e-05, |
| "epoch": 2.9900142653352355, |
| "step": 3145 |
| }, |
| { |
| "loss": 1.0058, |
| "grad_norm": 0.5535528659820557, |
| "learning_rate": 3.26868320340738e-05, |
| "epoch": 2.9947693770803614, |
| "step": 3150 |
| }, |
| { |
| "loss": 1.0126, |
| "grad_norm": 0.5523269772529602, |
| "learning_rate": 3.239672475344884e-05, |
| "epoch": 2.9995244888254873, |
| "step": 3155 |
| }, |
| { |
| "loss": 1.0185, |
| "grad_norm": 0.5492323637008667, |
| "learning_rate": 3.2107661564505586e-05, |
| "epoch": 3.003804089396101, |
| "step": 3160 |
| }, |
| { |
| "loss": 0.9936, |
| "grad_norm": 0.5420585870742798, |
| "learning_rate": 3.181964693165012e-05, |
| "epoch": 3.0085592011412268, |
| "step": 3165 |
| }, |
| { |
| "loss": 0.9742, |
| "grad_norm": 0.5570741295814514, |
| "learning_rate": 3.153268530309417e-05, |
| "epoch": 3.0133143128863527, |
| "step": 3170 |
| }, |
| { |
| "loss": 1.001, |
| "grad_norm": 0.5522676110267639, |
| "learning_rate": 3.12467811107865e-05, |
| "epoch": 3.018069424631479, |
| "step": 3175 |
| }, |
| { |
| "loss": 0.9852, |
| "grad_norm": 0.5823954939842224, |
| "learning_rate": 3.09619387703443e-05, |
| "epoch": 3.022824536376605, |
| "step": 3180 |
| }, |
| { |
| "loss": 0.9782, |
| "grad_norm": 0.5400838851928711, |
| "learning_rate": 3.067816268098521e-05, |
| "epoch": 3.0275796481217307, |
| "step": 3185 |
| }, |
| { |
| "loss": 0.9775, |
| "grad_norm": 0.5473350286483765, |
| "learning_rate": 3.039545722545922e-05, |
| "epoch": 3.032334759866857, |
| "step": 3190 |
| }, |
| { |
| "loss": 0.9809, |
| "grad_norm": 0.5682083964347839, |
| "learning_rate": 3.0113826769981048e-05, |
| "epoch": 3.037089871611983, |
| "step": 3195 |
| }, |
| { |
| "loss": 0.9775, |
| "grad_norm": 0.5388854742050171, |
| "learning_rate": 2.9833275664162653e-05, |
| "epoch": 3.041844983357109, |
| "step": 3200 |
| }, |
| { |
| "loss": 0.9844, |
| "grad_norm": 0.5479483604431152, |
| "learning_rate": 2.9553808240946125e-05, |
| "epoch": 3.0466000951022347, |
| "step": 3205 |
| }, |
| { |
| "loss": 0.9716, |
| "grad_norm": 0.6935160160064697, |
| "learning_rate": 2.9275428816536743e-05, |
| "epoch": 3.051355206847361, |
| "step": 3210 |
| }, |
| { |
| "loss": 0.9683, |
| "grad_norm": 0.5467348694801331, |
| "learning_rate": 2.899814169033622e-05, |
| "epoch": 3.056110318592487, |
| "step": 3215 |
| }, |
| { |
| "loss": 0.9788, |
| "grad_norm": 0.5488168001174927, |
| "learning_rate": 2.872195114487649e-05, |
| "epoch": 3.060865430337613, |
| "step": 3220 |
| }, |
| { |
| "loss": 1.019, |
| "grad_norm": 0.589457631111145, |
| "learning_rate": 2.8446861445753393e-05, |
| "epoch": 3.065620542082739, |
| "step": 3225 |
| }, |
| { |
| "loss": 0.991, |
| "grad_norm": 0.5361592173576355, |
| "learning_rate": 2.817287684156088e-05, |
| "epoch": 3.070375653827865, |
| "step": 3230 |
| }, |
| { |
| "loss": 0.9776, |
| "grad_norm": 0.5364184379577637, |
| "learning_rate": 2.7900001563825408e-05, |
| "epoch": 3.075130765572991, |
| "step": 3235 |
| }, |
| { |
| "loss": 0.9776, |
| "grad_norm": 0.5550234317779541, |
| "learning_rate": 2.7628239826940517e-05, |
| "epoch": 3.079885877318117, |
| "step": 3240 |
| }, |
| { |
| "loss": 0.9479, |
| "grad_norm": 0.5656265020370483, |
| "learning_rate": 2.7357595828101836e-05, |
| "epoch": 3.084640989063243, |
| "step": 3245 |
| }, |
| { |
| "loss": 0.99, |
| "grad_norm": 0.5605530738830566, |
| "learning_rate": 2.7088073747242104e-05, |
| "epoch": 3.089396100808369, |
| "step": 3250 |
| }, |
| { |
| "loss": 0.9862, |
| "grad_norm": 0.5612255334854126, |
| "learning_rate": 2.681967774696682e-05, |
| "epoch": 3.094151212553495, |
| "step": 3255 |
| }, |
| { |
| "loss": 0.9781, |
| "grad_norm": 0.5718798041343689, |
| "learning_rate": 2.655241197248981e-05, |
| "epoch": 3.0989063242986212, |
| "step": 3260 |
| }, |
| { |
| "loss": 0.9597, |
| "grad_norm": 0.571089506149292, |
| "learning_rate": 2.6286280551569232e-05, |
| "epoch": 3.103661436043747, |
| "step": 3265 |
| }, |
| { |
| "loss": 0.9658, |
| "grad_norm": 0.5225185751914978, |
| "learning_rate": 2.6021287594443865e-05, |
| "epoch": 3.108416547788873, |
| "step": 3270 |
| }, |
| { |
| "loss": 0.9879, |
| "grad_norm": 0.6016312837600708, |
| "learning_rate": 2.5757437193769608e-05, |
| "epoch": 3.113171659533999, |
| "step": 3275 |
| }, |
| { |
| "loss": 0.9937, |
| "grad_norm": 0.5616617202758789, |
| "learning_rate": 2.549473342455626e-05, |
| "epoch": 3.117926771279125, |
| "step": 3280 |
| }, |
| { |
| "loss": 0.9769, |
| "grad_norm": 0.5490360260009766, |
| "learning_rate": 2.5233180344104536e-05, |
| "epoch": 3.122681883024251, |
| "step": 3285 |
| }, |
| { |
| "loss": 0.9812, |
| "grad_norm": 0.5437697768211365, |
| "learning_rate": 2.4972781991943527e-05, |
| "epoch": 3.127436994769377, |
| "step": 3290 |
| }, |
| { |
| "loss": 0.9713, |
| "grad_norm": 0.5772987604141235, |
| "learning_rate": 2.471354238976823e-05, |
| "epoch": 3.132192106514503, |
| "step": 3295 |
| }, |
| { |
| "loss": 0.974, |
| "grad_norm": 0.5646182298660278, |
| "learning_rate": 2.4455465541377443e-05, |
| "epoch": 3.136947218259629, |
| "step": 3300 |
| }, |
| { |
| "loss": 0.9693, |
| "grad_norm": 0.5731268525123596, |
| "learning_rate": 2.4198555432611947e-05, |
| "epoch": 3.141702330004755, |
| "step": 3305 |
| }, |
| { |
| "loss": 0.9898, |
| "grad_norm": 0.5700622200965881, |
| "learning_rate": 2.39428160312929e-05, |
| "epoch": 3.146457441749881, |
| "step": 3310 |
| }, |
| { |
| "loss": 0.9444, |
| "grad_norm": 0.5610051155090332, |
| "learning_rate": 2.3688251287160667e-05, |
| "epoch": 3.1512125534950073, |
| "step": 3315 |
| }, |
| { |
| "loss": 0.9856, |
| "grad_norm": 0.56795734167099, |
| "learning_rate": 2.3434865131813633e-05, |
| "epoch": 3.155967665240133, |
| "step": 3320 |
| }, |
| { |
| "loss": 0.9879, |
| "grad_norm": 0.5798495411872864, |
| "learning_rate": 2.318266147864766e-05, |
| "epoch": 3.160722776985259, |
| "step": 3325 |
| }, |
| { |
| "loss": 0.9662, |
| "grad_norm": 0.5680422782897949, |
| "learning_rate": 2.293164422279559e-05, |
| "epoch": 3.1654778887303854, |
| "step": 3330 |
| }, |
| { |
| "loss": 0.9694, |
| "grad_norm": 0.5263216495513916, |
| "learning_rate": 2.268181724106706e-05, |
| "epoch": 3.1702330004755113, |
| "step": 3335 |
| }, |
| { |
| "loss": 0.9925, |
| "grad_norm": 0.5480270385742188, |
| "learning_rate": 2.243318439188863e-05, |
| "epoch": 3.174988112220637, |
| "step": 3340 |
| }, |
| { |
| "loss": 0.9828, |
| "grad_norm": 0.5666699409484863, |
| "learning_rate": 2.2185749515244224e-05, |
| "epoch": 3.179743223965763, |
| "step": 3345 |
| }, |
| { |
| "loss": 0.9752, |
| "grad_norm": 0.5875964164733887, |
| "learning_rate": 2.1939516432615816e-05, |
| "epoch": 3.1844983357108894, |
| "step": 3350 |
| }, |
| { |
| "loss": 0.9701, |
| "grad_norm": 0.5586257576942444, |
| "learning_rate": 2.169448894692433e-05, |
| "epoch": 3.1892534474560152, |
| "step": 3355 |
| }, |
| { |
| "loss": 0.9958, |
| "grad_norm": 0.5468137264251709, |
| "learning_rate": 2.1450670842471054e-05, |
| "epoch": 3.194008559201141, |
| "step": 3360 |
| }, |
| { |
| "loss": 1.0042, |
| "grad_norm": 0.5652130246162415, |
| "learning_rate": 2.1208065884879093e-05, |
| "epoch": 3.198763670946267, |
| "step": 3365 |
| }, |
| { |
| "loss": 0.9578, |
| "grad_norm": 0.5776921510696411, |
| "learning_rate": 2.0966677821035196e-05, |
| "epoch": 3.2035187826913933, |
| "step": 3370 |
| }, |
| { |
| "loss": 0.9876, |
| "grad_norm": 0.6246646642684937, |
| "learning_rate": 2.0726510379031993e-05, |
| "epoch": 3.2082738944365192, |
| "step": 3375 |
| }, |
| { |
| "loss": 0.9842, |
| "grad_norm": 0.5602626800537109, |
| "learning_rate": 2.0487567268110275e-05, |
| "epoch": 3.213029006181645, |
| "step": 3380 |
| }, |
| { |
| "loss": 0.982, |
| "grad_norm": 0.5294948220252991, |
| "learning_rate": 2.0249852178601857e-05, |
| "epoch": 3.2177841179267714, |
| "step": 3385 |
| }, |
| { |
| "loss": 0.9802, |
| "grad_norm": 0.5567699074745178, |
| "learning_rate": 2.001336878187241e-05, |
| "epoch": 3.2225392296718973, |
| "step": 3390 |
| }, |
| { |
| "loss": 0.9656, |
| "grad_norm": 0.5467512607574463, |
| "learning_rate": 1.9778120730264937e-05, |
| "epoch": 3.227294341417023, |
| "step": 3395 |
| }, |
| { |
| "loss": 0.9659, |
| "grad_norm": 0.5728263854980469, |
| "learning_rate": 1.9544111657043242e-05, |
| "epoch": 3.2320494531621495, |
| "step": 3400 |
| }, |
| { |
| "loss": 0.9767, |
| "grad_norm": 0.561977744102478, |
| "learning_rate": 1.9311345176335884e-05, |
| "epoch": 3.2368045649072754, |
| "step": 3405 |
| }, |
| { |
| "loss": 0.983, |
| "grad_norm": 0.5865560173988342, |
| "learning_rate": 1.90798248830803e-05, |
| "epoch": 3.2415596766524013, |
| "step": 3410 |
| }, |
| { |
| "loss": 0.9599, |
| "grad_norm": 0.5565074682235718, |
| "learning_rate": 1.8849554352967358e-05, |
| "epoch": 3.246314788397527, |
| "step": 3415 |
| }, |
| { |
| "loss": 0.9654, |
| "grad_norm": 0.554646909236908, |
| "learning_rate": 1.8620537142386073e-05, |
| "epoch": 3.2510699001426535, |
| "step": 3420 |
| }, |
| { |
| "loss": 0.9853, |
| "grad_norm": 0.5781422257423401, |
| "learning_rate": 1.839277678836866e-05, |
| "epoch": 3.2558250118877794, |
| "step": 3425 |
| }, |
| { |
| "loss": 0.9629, |
| "grad_norm": 0.5559192299842834, |
| "learning_rate": 1.8166276808535974e-05, |
| "epoch": 3.2605801236329053, |
| "step": 3430 |
| }, |
| { |
| "loss": 0.9648, |
| "grad_norm": 0.5655896067619324, |
| "learning_rate": 1.794104070104319e-05, |
| "epoch": 3.265335235378031, |
| "step": 3435 |
| }, |
| { |
| "loss": 0.9849, |
| "grad_norm": 0.5561752915382385, |
| "learning_rate": 1.7717071944525675e-05, |
| "epoch": 3.2700903471231575, |
| "step": 3440 |
| }, |
| { |
| "loss": 0.9844, |
| "grad_norm": 0.5643670558929443, |
| "learning_rate": 1.7494373998045365e-05, |
| "epoch": 3.2748454588682834, |
| "step": 3445 |
| }, |
| { |
| "loss": 0.987, |
| "grad_norm": 0.547095000743866, |
| "learning_rate": 1.7272950301037317e-05, |
| "epoch": 3.2796005706134093, |
| "step": 3450 |
| }, |
| { |
| "loss": 0.9865, |
| "grad_norm": 0.5514348149299622, |
| "learning_rate": 1.705280427325656e-05, |
| "epoch": 3.2843556823585356, |
| "step": 3455 |
| }, |
| { |
| "loss": 0.972, |
| "grad_norm": 0.5530180335044861, |
| "learning_rate": 1.6833939314725244e-05, |
| "epoch": 3.2891107941036615, |
| "step": 3460 |
| }, |
| { |
| "loss": 0.9729, |
| "grad_norm": 0.5614736676216125, |
| "learning_rate": 1.6616358805680254e-05, |
| "epoch": 3.2938659058487874, |
| "step": 3465 |
| }, |
| { |
| "loss": 0.982, |
| "grad_norm": 0.5563075542449951, |
| "learning_rate": 1.6400066106520894e-05, |
| "epoch": 3.2986210175939137, |
| "step": 3470 |
| }, |
| { |
| "loss": 0.9783, |
| "grad_norm": 0.5671818256378174, |
| "learning_rate": 1.6185064557757047e-05, |
| "epoch": 3.3033761293390396, |
| "step": 3475 |
| }, |
| { |
| "loss": 0.9607, |
| "grad_norm": 0.5554865598678589, |
| "learning_rate": 1.5971357479957528e-05, |
| "epoch": 3.3081312410841655, |
| "step": 3480 |
| }, |
| { |
| "loss": 0.9712, |
| "grad_norm": 0.562767744064331, |
| "learning_rate": 1.575894817369885e-05, |
| "epoch": 3.3128863528292913, |
| "step": 3485 |
| }, |
| { |
| "loss": 0.9795, |
| "grad_norm": 0.5692958831787109, |
| "learning_rate": 1.5547839919514262e-05, |
| "epoch": 3.3176414645744177, |
| "step": 3490 |
| }, |
| { |
| "loss": 0.9735, |
| "grad_norm": 0.6149819493293762, |
| "learning_rate": 1.5338035977842946e-05, |
| "epoch": 3.3223965763195435, |
| "step": 3495 |
| }, |
| { |
| "loss": 0.9733, |
| "grad_norm": 0.564020037651062, |
| "learning_rate": 1.5129539588979879e-05, |
| "epoch": 3.3271516880646694, |
| "step": 3500 |
| }, |
| { |
| "loss": 0.9666, |
| "grad_norm": 0.5903013348579407, |
| "learning_rate": 1.4922353973025626e-05, |
| "epoch": 3.3319067998097953, |
| "step": 3505 |
| }, |
| { |
| "loss": 0.951, |
| "grad_norm": 0.5840398073196411, |
| "learning_rate": 1.4716482329836667e-05, |
| "epoch": 3.3366619115549216, |
| "step": 3510 |
| }, |
| { |
| "loss": 0.971, |
| "grad_norm": 0.5739172697067261, |
| "learning_rate": 1.4511927838975958e-05, |
| "epoch": 3.3414170233000475, |
| "step": 3515 |
| }, |
| { |
| "loss": 0.995, |
| "grad_norm": 0.5700482130050659, |
| "learning_rate": 1.4308693659663842e-05, |
| "epoch": 3.3461721350451734, |
| "step": 3520 |
| }, |
| { |
| "loss": 0.9759, |
| "grad_norm": 0.5534051060676575, |
| "learning_rate": 1.4106782930729278e-05, |
| "epoch": 3.3509272467902997, |
| "step": 3525 |
| }, |
| { |
| "loss": 0.9567, |
| "grad_norm": 0.5564413070678711, |
| "learning_rate": 1.3906198770561263e-05, |
| "epoch": 3.3556823585354256, |
| "step": 3530 |
| }, |
| { |
| "loss": 0.9823, |
| "grad_norm": 0.9785653352737427, |
| "learning_rate": 1.3706944277060795e-05, |
| "epoch": 3.3604374702805515, |
| "step": 3535 |
| }, |
| { |
| "loss": 1.0064, |
| "grad_norm": 0.56400066614151, |
| "learning_rate": 1.3509022527592975e-05, |
| "epoch": 3.365192582025678, |
| "step": 3540 |
| }, |
| { |
| "loss": 1.001, |
| "grad_norm": 0.56412672996521, |
| "learning_rate": 1.3312436578939457e-05, |
| "epoch": 3.3699476937708037, |
| "step": 3545 |
| }, |
| { |
| "loss": 0.9743, |
| "grad_norm": 0.548657238483429, |
| "learning_rate": 1.3117189467251279e-05, |
| "epoch": 3.3747028055159296, |
| "step": 3550 |
| }, |
| { |
| "loss": 0.9724, |
| "grad_norm": 0.5680193901062012, |
| "learning_rate": 1.292328420800194e-05, |
| "epoch": 3.3794579172610555, |
| "step": 3555 |
| }, |
| { |
| "loss": 0.9789, |
| "grad_norm": 0.5620667934417725, |
| "learning_rate": 1.2730723795940836e-05, |
| "epoch": 3.384213029006182, |
| "step": 3560 |
| }, |
| { |
| "loss": 0.9875, |
| "grad_norm": 0.5768178701400757, |
| "learning_rate": 1.253951120504706e-05, |
| "epoch": 3.3889681407513077, |
| "step": 3565 |
| }, |
| { |
| "loss": 0.9796, |
| "grad_norm": 0.5607120990753174, |
| "learning_rate": 1.234964938848332e-05, |
| "epoch": 3.3937232524964336, |
| "step": 3570 |
| }, |
| { |
| "loss": 0.9866, |
| "grad_norm": 0.5664351582527161, |
| "learning_rate": 1.2161141278550525e-05, |
| "epoch": 3.3984783642415595, |
| "step": 3575 |
| }, |
| { |
| "loss": 0.9838, |
| "grad_norm": 0.5523473620414734, |
| "learning_rate": 1.197398978664238e-05, |
| "epoch": 3.403233475986686, |
| "step": 3580 |
| }, |
| { |
| "loss": 0.9585, |
| "grad_norm": 0.5521702170372009, |
| "learning_rate": 1.1788197803200419e-05, |
| "epoch": 3.4079885877318117, |
| "step": 3585 |
| }, |
| { |
| "loss": 0.9582, |
| "grad_norm": 0.532323956489563, |
| "learning_rate": 1.1603768197669429e-05, |
| "epoch": 3.4127436994769376, |
| "step": 3590 |
| }, |
| { |
| "loss": 0.9788, |
| "grad_norm": 0.5814311504364014, |
| "learning_rate": 1.142070381845306e-05, |
| "epoch": 3.417498811222064, |
| "step": 3595 |
| }, |
| { |
| "loss": 0.9679, |
| "grad_norm": 0.5709570646286011, |
| "learning_rate": 1.1239007492869913e-05, |
| "epoch": 3.4222539229671898, |
| "step": 3600 |
| }, |
| { |
| "loss": 0.9966, |
| "grad_norm": 0.5555875301361084, |
| "learning_rate": 1.1058682027109757e-05, |
| "epoch": 3.4270090347123157, |
| "step": 3605 |
| }, |
| { |
| "loss": 0.9827, |
| "grad_norm": 0.575583279132843, |
| "learning_rate": 1.0879730206190319e-05, |
| "epoch": 3.431764146457442, |
| "step": 3610 |
| }, |
| { |
| "loss": 0.9743, |
| "grad_norm": 0.5674506425857544, |
| "learning_rate": 1.0702154793914198e-05, |
| "epoch": 3.436519258202568, |
| "step": 3615 |
| }, |
| { |
| "loss": 0.9782, |
| "grad_norm": 0.5540629029273987, |
| "learning_rate": 1.0525958532826196e-05, |
| "epoch": 3.4412743699476938, |
| "step": 3620 |
| }, |
| { |
| "loss": 0.9835, |
| "grad_norm": 0.5674764513969421, |
| "learning_rate": 1.0351144144170944e-05, |
| "epoch": 3.4460294816928196, |
| "step": 3625 |
| }, |
| { |
| "loss": 0.9923, |
| "grad_norm": 0.5540902018547058, |
| "learning_rate": 1.0177714327850906e-05, |
| "epoch": 3.450784593437946, |
| "step": 3630 |
| }, |
| { |
| "loss": 0.9718, |
| "grad_norm": 0.5495504140853882, |
| "learning_rate": 1.0005671762384671e-05, |
| "epoch": 3.455539705183072, |
| "step": 3635 |
| }, |
| { |
| "loss": 0.9697, |
| "grad_norm": 0.5659884810447693, |
| "learning_rate": 9.83501910486555e-06, |
| "epoch": 3.4602948169281977, |
| "step": 3640 |
| }, |
| { |
| "loss": 0.9765, |
| "grad_norm": 0.5598602890968323, |
| "learning_rate": 9.66575899092057e-06, |
| "epoch": 3.4650499286733236, |
| "step": 3645 |
| }, |
| { |
| "loss": 0.9763, |
| "grad_norm": 0.5722287893295288, |
| "learning_rate": 9.497894034669807e-06, |
| "epoch": 3.46980504041845, |
| "step": 3650 |
| }, |
| { |
| "loss": 0.973, |
| "grad_norm": 0.5336728096008301, |
| "learning_rate": 9.331426828685918e-06, |
| "epoch": 3.474560152163576, |
| "step": 3655 |
| }, |
| { |
| "loss": 0.983, |
| "grad_norm": 0.561795175075531, |
| "learning_rate": 9.166359943954184e-06, |
| "epoch": 3.4793152639087017, |
| "step": 3660 |
| }, |
| { |
| "loss": 0.9808, |
| "grad_norm": 0.5827515721321106, |
| "learning_rate": 9.002695929832761e-06, |
| "epoch": 3.484070375653828, |
| "step": 3665 |
| }, |
| { |
| "loss": 0.9839, |
| "grad_norm": 0.5711756944656372, |
| "learning_rate": 8.84043731401335e-06, |
| "epoch": 3.488825487398954, |
| "step": 3670 |
| }, |
| { |
| "loss": 0.9909, |
| "grad_norm": 0.5505388379096985, |
| "learning_rate": 8.679586602482047e-06, |
| "epoch": 3.49358059914408, |
| "step": 3675 |
| }, |
| { |
| "loss": 0.9838, |
| "grad_norm": 0.5935444831848145, |
| "learning_rate": 8.520146279480778e-06, |
| "epoch": 3.498335710889206, |
| "step": 3680 |
| }, |
| { |
| "loss": 0.9871, |
| "grad_norm": 0.5624505281448364, |
| "learning_rate": 8.362118807468877e-06, |
| "epoch": 3.503090822634332, |
| "step": 3685 |
| }, |
| { |
| "loss": 0.9781, |
| "grad_norm": 0.5504452586174011, |
| "learning_rate": 8.205506627085024e-06, |
| "epoch": 3.507845934379458, |
| "step": 3690 |
| }, |
| { |
| "loss": 1.003, |
| "grad_norm": 0.5809118151664734, |
| "learning_rate": 8.050312157109585e-06, |
| "epoch": 3.512601046124584, |
| "step": 3695 |
| }, |
| { |
| "loss": 0.9493, |
| "grad_norm": 0.5631592273712158, |
| "learning_rate": 7.89653779442725e-06, |
| "epoch": 3.5173561578697097, |
| "step": 3700 |
| }, |
| { |
| "loss": 0.9752, |
| "grad_norm": 0.5551528930664062, |
| "learning_rate": 7.744185913990009e-06, |
| "epoch": 3.522111269614836, |
| "step": 3705 |
| }, |
| { |
| "loss": 0.9609, |
| "grad_norm": 0.5691944360733032, |
| "learning_rate": 7.593258868780462e-06, |
| "epoch": 3.526866381359962, |
| "step": 3710 |
| }, |
| { |
| "loss": 0.965, |
| "grad_norm": 0.5610807538032532, |
| "learning_rate": 7.443758989775496e-06, |
| "epoch": 3.5316214931050878, |
| "step": 3715 |
| }, |
| { |
| "loss": 0.9667, |
| "grad_norm": 0.5502846240997314, |
| "learning_rate": 7.2956885859102986e-06, |
| "epoch": 3.536376604850214, |
| "step": 3720 |
| }, |
| { |
| "loss": 0.9646, |
| "grad_norm": 0.544586718082428, |
| "learning_rate": 7.149049944042652e-06, |
| "epoch": 3.54113171659534, |
| "step": 3725 |
| }, |
| { |
| "loss": 1.0041, |
| "grad_norm": 0.5851811170578003, |
| "learning_rate": 7.0038453289176576e-06, |
| "epoch": 3.545886828340466, |
| "step": 3730 |
| }, |
| { |
| "loss": 0.9639, |
| "grad_norm": 0.56463623046875, |
| "learning_rate": 6.860076983132757e-06, |
| "epoch": 3.550641940085592, |
| "step": 3735 |
| }, |
| { |
| "loss": 0.9755, |
| "grad_norm": 0.5568621158599854, |
| "learning_rate": 6.717747127103036e-06, |
| "epoch": 3.555397051830718, |
| "step": 3740 |
| }, |
| { |
| "loss": 0.9616, |
| "grad_norm": 0.5334104299545288, |
| "learning_rate": 6.576857959027027e-06, |
| "epoch": 3.560152163575844, |
| "step": 3745 |
| }, |
| { |
| "loss": 0.9813, |
| "grad_norm": 0.5757048726081848, |
| "learning_rate": 6.437411654852665e-06, |
| "epoch": 3.5649072753209703, |
| "step": 3750 |
| }, |
| { |
| "loss": 0.9856, |
| "grad_norm": 0.5900883674621582, |
| "learning_rate": 6.299410368243741e-06, |
| "epoch": 3.569662387066096, |
| "step": 3755 |
| }, |
| { |
| "loss": 0.9622, |
| "grad_norm": 0.5632519721984863, |
| "learning_rate": 6.162856230546632e-06, |
| "epoch": 3.574417498811222, |
| "step": 3760 |
| }, |
| { |
| "loss": 0.9747, |
| "grad_norm": 0.5620511770248413, |
| "learning_rate": 6.027751350757371e-06, |
| "epoch": 3.579172610556348, |
| "step": 3765 |
| }, |
| { |
| "loss": 0.9822, |
| "grad_norm": 0.5545108914375305, |
| "learning_rate": 5.8940978154890856e-06, |
| "epoch": 3.583927722301474, |
| "step": 3770 |
| }, |
| { |
| "loss": 0.9699, |
| "grad_norm": 0.5362772345542908, |
| "learning_rate": 5.761897688939743e-06, |
| "epoch": 3.5886828340466, |
| "step": 3775 |
| }, |
| { |
| "loss": 0.988, |
| "grad_norm": 0.5749474167823792, |
| "learning_rate": 5.631153012860346e-06, |
| "epoch": 3.593437945791726, |
| "step": 3780 |
| }, |
| { |
| "loss": 0.9714, |
| "grad_norm": 0.5488414168357849, |
| "learning_rate": 5.501865806523265e-06, |
| "epoch": 3.598193057536852, |
| "step": 3785 |
| }, |
| { |
| "loss": 0.9935, |
| "grad_norm": 0.5571540594100952, |
| "learning_rate": 5.374038066691178e-06, |
| "epoch": 3.6029481692819783, |
| "step": 3790 |
| }, |
| { |
| "loss": 0.9627, |
| "grad_norm": 0.5480414032936096, |
| "learning_rate": 5.247671767586204e-06, |
| "epoch": 3.607703281027104, |
| "step": 3795 |
| }, |
| { |
| "loss": 0.9785, |
| "grad_norm": 0.5548726320266724, |
| "learning_rate": 5.122768860859362e-06, |
| "epoch": 3.61245839277223, |
| "step": 3800 |
| }, |
| { |
| "loss": 0.9763, |
| "grad_norm": 0.5957520008087158, |
| "learning_rate": 4.9993312755604705e-06, |
| "epoch": 3.6172135045173563, |
| "step": 3805 |
| }, |
| { |
| "loss": 0.9709, |
| "grad_norm": 0.5612404942512512, |
| "learning_rate": 4.877360918108342e-06, |
| "epoch": 3.6219686162624822, |
| "step": 3810 |
| }, |
| { |
| "loss": 0.961, |
| "grad_norm": 0.5460146069526672, |
| "learning_rate": 4.756859672261382e-06, |
| "epoch": 3.626723728007608, |
| "step": 3815 |
| }, |
| { |
| "loss": 0.9846, |
| "grad_norm": 0.5655248165130615, |
| "learning_rate": 4.637829399088367e-06, |
| "epoch": 3.6314788397527344, |
| "step": 3820 |
| }, |
| { |
| "loss": 0.9958, |
| "grad_norm": 0.5633090138435364, |
| "learning_rate": 4.520271936939879e-06, |
| "epoch": 3.6362339514978603, |
| "step": 3825 |
| }, |
| { |
| "loss": 0.9887, |
| "grad_norm": 0.5754178762435913, |
| "learning_rate": 4.404189101419775e-06, |
| "epoch": 3.640989063242986, |
| "step": 3830 |
| }, |
| { |
| "loss": 0.9794, |
| "grad_norm": 0.5511764883995056, |
| "learning_rate": 4.2895826853572295e-06, |
| "epoch": 3.645744174988112, |
| "step": 3835 |
| }, |
| { |
| "loss": 0.9636, |
| "grad_norm": 0.5726919770240784, |
| "learning_rate": 4.176454458778978e-06, |
| "epoch": 3.650499286733238, |
| "step": 3840 |
| }, |
| { |
| "loss": 0.9674, |
| "grad_norm": 0.5523869395256042, |
| "learning_rate": 4.064806168882041e-06, |
| "epoch": 3.6552543984783643, |
| "step": 3845 |
| }, |
| { |
| "loss": 0.9741, |
| "grad_norm": 0.55208820104599, |
| "learning_rate": 3.954639540006699e-06, |
| "epoch": 3.66000951022349, |
| "step": 3850 |
| }, |
| { |
| "loss": 0.9793, |
| "grad_norm": 0.5573598146438599, |
| "learning_rate": 3.845956273609874e-06, |
| "epoch": 3.664764621968616, |
| "step": 3855 |
| }, |
| { |
| "loss": 0.9679, |
| "grad_norm": 0.5647951364517212, |
| "learning_rate": 3.738758048238855e-06, |
| "epoch": 3.6695197337137424, |
| "step": 3860 |
| }, |
| { |
| "loss": 0.9764, |
| "grad_norm": 0.5522949695587158, |
| "learning_rate": 3.6330465195053675e-06, |
| "epoch": 3.6742748454588683, |
| "step": 3865 |
| }, |
| { |
| "loss": 0.9752, |
| "grad_norm": 0.5610141754150391, |
| "learning_rate": 3.528823320060015e-06, |
| "epoch": 3.679029957203994, |
| "step": 3870 |
| }, |
| { |
| "loss": 0.9731, |
| "grad_norm": 0.5533769726753235, |
| "learning_rate": 3.4260900595670553e-06, |
| "epoch": 3.6837850689491205, |
| "step": 3875 |
| }, |
| { |
| "loss": 0.9834, |
| "grad_norm": 0.5678328275680542, |
| "learning_rate": 3.3248483246795193e-06, |
| "epoch": 3.6885401806942464, |
| "step": 3880 |
| }, |
| { |
| "loss": 0.9871, |
| "grad_norm": 0.5952870845794678, |
| "learning_rate": 3.2250996790147647e-06, |
| "epoch": 3.6932952924393723, |
| "step": 3885 |
| }, |
| { |
| "loss": 0.9782, |
| "grad_norm": 0.5699864625930786, |
| "learning_rate": 3.1268456631302733e-06, |
| "epoch": 3.6980504041844986, |
| "step": 3890 |
| }, |
| { |
| "loss": 0.9831, |
| "grad_norm": 0.5535215139389038, |
| "learning_rate": 3.0300877944998473e-06, |
| "epoch": 3.7028055159296245, |
| "step": 3895 |
| }, |
| { |
| "loss": 0.9748, |
| "grad_norm": 0.5434964895248413, |
| "learning_rate": 2.934827567490228e-06, |
| "epoch": 3.7075606276747504, |
| "step": 3900 |
| }, |
| { |
| "loss": 0.991, |
| "grad_norm": 0.5597246885299683, |
| "learning_rate": 2.8410664533379928e-06, |
| "epoch": 3.7123157394198762, |
| "step": 3905 |
| }, |
| { |
| "loss": 0.9389, |
| "grad_norm": 0.5486416220664978, |
| "learning_rate": 2.748805900126816e-06, |
| "epoch": 3.717070851165002, |
| "step": 3910 |
| } |
| ], |
| "best_metric": null, |
| "best_global_step": null, |
| "best_model_checkpoint": null, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "is_hyper_param_search": false, |
| "trial_name": null, |
| "trial_params": null, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_training_stop": false, |
| "should_epoch_stop": false, |
| "should_save": true, |
| "should_evaluate": false, |
| "should_log": false |
| }, |
| "attributes": {} |
| } |
| } |
| } |