| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.1221065626665918, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022446059313711735, |
| "grad_norm": 7.085300922393799, |
| "learning_rate": 1.3462976813762156e-07, |
| "loss": 0.7522, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004489211862742347, |
| "grad_norm": 6.747836112976074, |
| "learning_rate": 2.842183994016455e-07, |
| "loss": 0.7426, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006733817794113521, |
| "grad_norm": 6.484082221984863, |
| "learning_rate": 4.3380703066566945e-07, |
| "loss": 0.7352, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008978423725484694, |
| "grad_norm": 5.182391166687012, |
| "learning_rate": 5.833956619296934e-07, |
| "loss": 0.6797, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011223029656855869, |
| "grad_norm": 3.4766058921813965, |
| "learning_rate": 7.329842931937173e-07, |
| "loss": 0.5879, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.013467635588227042, |
| "grad_norm": 2.9406728744506836, |
| "learning_rate": 8.825729244577413e-07, |
| "loss": 0.4917, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.015712241519598217, |
| "grad_norm": 2.0085201263427734, |
| "learning_rate": 1.0321615557217653e-06, |
| "loss": 0.362, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01795684745096939, |
| "grad_norm": 1.118497610092163, |
| "learning_rate": 1.1817501869857892e-06, |
| "loss": 0.2813, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.020201453382340563, |
| "grad_norm": 0.764472484588623, |
| "learning_rate": 1.3313388182498132e-06, |
| "loss": 0.2272, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.022446059313711738, |
| "grad_norm": 0.5994025468826294, |
| "learning_rate": 1.480927449513837e-06, |
| "loss": 0.1925, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02469066524508291, |
| "grad_norm": 0.5585537552833557, |
| "learning_rate": 1.630516080777861e-06, |
| "loss": 0.1728, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.026935271176454084, |
| "grad_norm": 0.4982088804244995, |
| "learning_rate": 1.780104712041885e-06, |
| "loss": 0.1535, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.02917987710782526, |
| "grad_norm": 0.5461462736129761, |
| "learning_rate": 1.929693343305909e-06, |
| "loss": 0.142, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.031424483039196434, |
| "grad_norm": 0.4731317460536957, |
| "learning_rate": 2.0792819745699327e-06, |
| "loss": 0.1325, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.033669088970567605, |
| "grad_norm": 0.4937710165977478, |
| "learning_rate": 2.228870605833957e-06, |
| "loss": 0.1237, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03591369490193878, |
| "grad_norm": 0.4619077742099762, |
| "learning_rate": 2.378459237097981e-06, |
| "loss": 0.1134, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.038158300833309955, |
| "grad_norm": 0.4655718207359314, |
| "learning_rate": 2.5280478683620047e-06, |
| "loss": 0.1082, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.040402906764681126, |
| "grad_norm": 0.6071754097938538, |
| "learning_rate": 2.6776364996260284e-06, |
| "loss": 0.103, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0426475126960523, |
| "grad_norm": 0.44958844780921936, |
| "learning_rate": 2.8272251308900526e-06, |
| "loss": 0.1004, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.044892118627423476, |
| "grad_norm": 0.4919949471950531, |
| "learning_rate": 2.9768137621540767e-06, |
| "loss": 0.0967, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04713672455879465, |
| "grad_norm": 0.4707821011543274, |
| "learning_rate": 3.1264023934181005e-06, |
| "loss": 0.0912, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04938133049016582, |
| "grad_norm": 0.4630533456802368, |
| "learning_rate": 3.2759910246821242e-06, |
| "loss": 0.0895, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.051625936421537, |
| "grad_norm": 0.5952326059341431, |
| "learning_rate": 3.4255796559461484e-06, |
| "loss": 0.0877, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05387054235290817, |
| "grad_norm": 0.47812679409980774, |
| "learning_rate": 3.575168287210172e-06, |
| "loss": 0.0851, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05611514828427934, |
| "grad_norm": 0.5016306638717651, |
| "learning_rate": 3.7247569184741963e-06, |
| "loss": 0.0808, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05835975421565052, |
| "grad_norm": 0.49919837713241577, |
| "learning_rate": 3.874345549738221e-06, |
| "loss": 0.0803, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06060436014702169, |
| "grad_norm": 0.4146517813205719, |
| "learning_rate": 4.0239341810022446e-06, |
| "loss": 0.0777, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06284896607839287, |
| "grad_norm": 0.4712880849838257, |
| "learning_rate": 4.173522812266268e-06, |
| "loss": 0.0788, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06509357200976404, |
| "grad_norm": 0.45096713304519653, |
| "learning_rate": 4.323111443530292e-06, |
| "loss": 0.074, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06733817794113521, |
| "grad_norm": 0.4806267023086548, |
| "learning_rate": 4.472700074794316e-06, |
| "loss": 0.072, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06958278387250638, |
| "grad_norm": 0.43456050753593445, |
| "learning_rate": 4.6222887060583395e-06, |
| "loss": 0.0716, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07182738980387755, |
| "grad_norm": 0.3963911533355713, |
| "learning_rate": 4.771877337322364e-06, |
| "loss": 0.0713, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07407199573524872, |
| "grad_norm": 0.36991456151008606, |
| "learning_rate": 4.921465968586388e-06, |
| "loss": 0.067, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07631660166661991, |
| "grad_norm": 0.5009816288948059, |
| "learning_rate": 5.0710545998504115e-06, |
| "loss": 0.068, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07856120759799108, |
| "grad_norm": 0.46386927366256714, |
| "learning_rate": 5.220643231114436e-06, |
| "loss": 0.0683, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08080581352936225, |
| "grad_norm": 0.508665144443512, |
| "learning_rate": 5.37023186237846e-06, |
| "loss": 0.0672, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08305041946073342, |
| "grad_norm": 0.41486406326293945, |
| "learning_rate": 5.5198204936424836e-06, |
| "loss": 0.0649, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0852950253921046, |
| "grad_norm": 0.4998893737792969, |
| "learning_rate": 5.669409124906507e-06, |
| "loss": 0.0658, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08753963132347577, |
| "grad_norm": 0.5872018933296204, |
| "learning_rate": 5.818997756170531e-06, |
| "loss": 0.0626, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08978423725484695, |
| "grad_norm": 0.5181050896644592, |
| "learning_rate": 5.968586387434555e-06, |
| "loss": 0.0623, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09202884318621812, |
| "grad_norm": 0.4561860263347626, |
| "learning_rate": 6.1181750186985785e-06, |
| "loss": 0.0637, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0942734491175893, |
| "grad_norm": 0.4229351878166199, |
| "learning_rate": 6.267763649962604e-06, |
| "loss": 0.0626, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09651805504896047, |
| "grad_norm": 0.43529367446899414, |
| "learning_rate": 6.417352281226628e-06, |
| "loss": 0.0623, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09876266098033164, |
| "grad_norm": 0.44300997257232666, |
| "learning_rate": 6.566940912490651e-06, |
| "loss": 0.062, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.10100726691170281, |
| "grad_norm": 0.415102481842041, |
| "learning_rate": 6.716529543754675e-06, |
| "loss": 0.0583, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.103251872843074, |
| "grad_norm": 0.39811965823173523, |
| "learning_rate": 6.866118175018699e-06, |
| "loss": 0.0569, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10549647877444517, |
| "grad_norm": 0.4333321750164032, |
| "learning_rate": 7.015706806282723e-06, |
| "loss": 0.0615, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10774108470581634, |
| "grad_norm": 0.379936546087265, |
| "learning_rate": 7.165295437546746e-06, |
| "loss": 0.0552, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10998569063718751, |
| "grad_norm": 0.3810630142688751, |
| "learning_rate": 7.314884068810772e-06, |
| "loss": 0.0564, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.11223029656855868, |
| "grad_norm": 0.38735371828079224, |
| "learning_rate": 7.4644727000747955e-06, |
| "loss": 0.0553, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11447490249992985, |
| "grad_norm": 0.41105860471725464, |
| "learning_rate": 7.614061331338819e-06, |
| "loss": 0.0552, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11671950843130104, |
| "grad_norm": 0.41426989436149597, |
| "learning_rate": 7.763649962602843e-06, |
| "loss": 0.0538, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11896411436267221, |
| "grad_norm": 0.3534553647041321, |
| "learning_rate": 7.913238593866866e-06, |
| "loss": 0.0529, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.12120872029404338, |
| "grad_norm": 0.4285803437232971, |
| "learning_rate": 8.06282722513089e-06, |
| "loss": 0.0509, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12345332622541455, |
| "grad_norm": 0.40142738819122314, |
| "learning_rate": 8.212415856394913e-06, |
| "loss": 0.0544, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12569793215678574, |
| "grad_norm": 0.434146910905838, |
| "learning_rate": 8.36200448765894e-06, |
| "loss": 0.0514, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1279425380881569, |
| "grad_norm": 0.421758770942688, |
| "learning_rate": 8.511593118922962e-06, |
| "loss": 0.0491, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.13018714401952808, |
| "grad_norm": 0.41949188709259033, |
| "learning_rate": 8.661181750186987e-06, |
| "loss": 0.051, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.13243174995089924, |
| "grad_norm": 0.34015727043151855, |
| "learning_rate": 8.81077038145101e-06, |
| "loss": 0.0491, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.13467635588227042, |
| "grad_norm": 0.3929171562194824, |
| "learning_rate": 8.960359012715034e-06, |
| "loss": 0.0477, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1369209618136416, |
| "grad_norm": 0.41220733523368835, |
| "learning_rate": 9.109947643979057e-06, |
| "loss": 0.0483, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.13916556774501276, |
| "grad_norm": 0.38868510723114014, |
| "learning_rate": 9.259536275243082e-06, |
| "loss": 0.0482, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.14141017367638395, |
| "grad_norm": 0.35203954577445984, |
| "learning_rate": 9.409124906507107e-06, |
| "loss": 0.0467, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.1436547796077551, |
| "grad_norm": 0.35794001817703247, |
| "learning_rate": 9.558713537771131e-06, |
| "loss": 0.0467, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1458993855391263, |
| "grad_norm": 0.33909112215042114, |
| "learning_rate": 9.708302169035154e-06, |
| "loss": 0.0453, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14814399147049745, |
| "grad_norm": 0.3420808017253876, |
| "learning_rate": 9.857890800299179e-06, |
| "loss": 0.0473, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.15038859740186863, |
| "grad_norm": 0.3780367970466614, |
| "learning_rate": 1.0007479431563203e-05, |
| "loss": 0.046, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.15263320333323982, |
| "grad_norm": 0.3484839200973511, |
| "learning_rate": 1.0157068062827226e-05, |
| "loss": 0.0443, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.15487780926461098, |
| "grad_norm": 0.3256385624408722, |
| "learning_rate": 1.030665669409125e-05, |
| "loss": 0.0468, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.15712241519598216, |
| "grad_norm": 0.3509197235107422, |
| "learning_rate": 1.0456245325355273e-05, |
| "loss": 0.0467, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15936702112735332, |
| "grad_norm": 0.3882471024990082, |
| "learning_rate": 1.0605833956619298e-05, |
| "loss": 0.0441, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1616116270587245, |
| "grad_norm": 0.41281041502952576, |
| "learning_rate": 1.0755422587883323e-05, |
| "loss": 0.045, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.16385623299009566, |
| "grad_norm": 0.34965190291404724, |
| "learning_rate": 1.0905011219147346e-05, |
| "loss": 0.0472, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16610083892146685, |
| "grad_norm": 0.3685629367828369, |
| "learning_rate": 1.105459985041137e-05, |
| "loss": 0.043, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16834544485283803, |
| "grad_norm": 0.3236459791660309, |
| "learning_rate": 1.1204188481675393e-05, |
| "loss": 0.0446, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1705900507842092, |
| "grad_norm": 0.4009409546852112, |
| "learning_rate": 1.1353777112939418e-05, |
| "loss": 0.0432, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.17283465671558038, |
| "grad_norm": 0.3602420389652252, |
| "learning_rate": 1.150336574420344e-05, |
| "loss": 0.0421, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.17507926264695153, |
| "grad_norm": 0.30885422229766846, |
| "learning_rate": 1.1652954375467465e-05, |
| "loss": 0.0428, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.17732386857832272, |
| "grad_norm": 0.3470623791217804, |
| "learning_rate": 1.1802543006731488e-05, |
| "loss": 0.0405, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1795684745096939, |
| "grad_norm": 0.31664037704467773, |
| "learning_rate": 1.1952131637995514e-05, |
| "loss": 0.0414, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18181308044106506, |
| "grad_norm": 0.28882402181625366, |
| "learning_rate": 1.2101720269259539e-05, |
| "loss": 0.042, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.18405768637243625, |
| "grad_norm": 0.341634064912796, |
| "learning_rate": 1.2251308900523562e-05, |
| "loss": 0.0419, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1863022923038074, |
| "grad_norm": 0.32544925808906555, |
| "learning_rate": 1.2400897531787586e-05, |
| "loss": 0.0386, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.1885468982351786, |
| "grad_norm": 0.32963812351226807, |
| "learning_rate": 1.2550486163051609e-05, |
| "loss": 0.0433, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.19079150416654975, |
| "grad_norm": 0.34846410155296326, |
| "learning_rate": 1.2700074794315634e-05, |
| "loss": 0.0425, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.19303611009792093, |
| "grad_norm": 0.4119662642478943, |
| "learning_rate": 1.2849663425579657e-05, |
| "loss": 0.0411, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.19528071602929212, |
| "grad_norm": 0.36676207184791565, |
| "learning_rate": 1.2999252056843681e-05, |
| "loss": 0.0407, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19752532196066327, |
| "grad_norm": 0.320718914270401, |
| "learning_rate": 1.3148840688107704e-05, |
| "loss": 0.0408, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.19976992789203446, |
| "grad_norm": 0.297377347946167, |
| "learning_rate": 1.3298429319371729e-05, |
| "loss": 0.0393, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.20201453382340562, |
| "grad_norm": 0.29334571957588196, |
| "learning_rate": 1.3448017950635752e-05, |
| "loss": 0.0404, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2042591397547768, |
| "grad_norm": 0.3002082407474518, |
| "learning_rate": 1.3597606581899776e-05, |
| "loss": 0.0381, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.206503745686148, |
| "grad_norm": 0.3855285942554474, |
| "learning_rate": 1.3747195213163799e-05, |
| "loss": 0.0395, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.20874835161751915, |
| "grad_norm": 0.37728843092918396, |
| "learning_rate": 1.3896783844427824e-05, |
| "loss": 0.0387, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.21099295754889033, |
| "grad_norm": 0.3107747733592987, |
| "learning_rate": 1.404637247569185e-05, |
| "loss": 0.0365, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.2132375634802615, |
| "grad_norm": 0.31299862265586853, |
| "learning_rate": 1.4195961106955873e-05, |
| "loss": 0.0386, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.21548216941163267, |
| "grad_norm": 0.2866477072238922, |
| "learning_rate": 1.4345549738219897e-05, |
| "loss": 0.0384, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21772677534300383, |
| "grad_norm": 0.2896515429019928, |
| "learning_rate": 1.449513836948392e-05, |
| "loss": 0.0368, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.21997138127437502, |
| "grad_norm": 0.3208376169204712, |
| "learning_rate": 1.4644727000747945e-05, |
| "loss": 0.035, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2222159872057462, |
| "grad_norm": 0.26684877276420593, |
| "learning_rate": 1.479431563201197e-05, |
| "loss": 0.0386, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.22446059313711736, |
| "grad_norm": 0.2770873010158539, |
| "learning_rate": 1.4943904263275992e-05, |
| "loss": 0.0359, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.22670519906848854, |
| "grad_norm": 0.33455002307891846, |
| "learning_rate": 1.5093492894540017e-05, |
| "loss": 0.0386, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.2289498049998597, |
| "grad_norm": 0.2847146689891815, |
| "learning_rate": 1.524308152580404e-05, |
| "loss": 0.036, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2311944109312309, |
| "grad_norm": 0.3237374722957611, |
| "learning_rate": 1.5392670157068064e-05, |
| "loss": 0.0383, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.23343901686260207, |
| "grad_norm": 0.26867830753326416, |
| "learning_rate": 1.5542258788332087e-05, |
| "loss": 0.0336, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.23568362279397323, |
| "grad_norm": 0.3390122056007385, |
| "learning_rate": 1.569184741959611e-05, |
| "loss": 0.037, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.23792822872534442, |
| "grad_norm": 0.3243669867515564, |
| "learning_rate": 1.5841436050860136e-05, |
| "loss": 0.0369, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.24017283465671557, |
| "grad_norm": 0.2567446827888489, |
| "learning_rate": 1.599102468212416e-05, |
| "loss": 0.0343, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.24241744058808676, |
| "grad_norm": 0.3240516781806946, |
| "learning_rate": 1.6140613313388185e-05, |
| "loss": 0.0361, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.24466204651945792, |
| "grad_norm": 0.30721980333328247, |
| "learning_rate": 1.629020194465221e-05, |
| "loss": 0.0351, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2469066524508291, |
| "grad_norm": 0.2947623133659363, |
| "learning_rate": 1.643979057591623e-05, |
| "loss": 0.0349, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24915125838220029, |
| "grad_norm": 0.3040514290332794, |
| "learning_rate": 1.6589379207180258e-05, |
| "loss": 0.0347, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.25139586431357147, |
| "grad_norm": 0.2889814078807831, |
| "learning_rate": 1.673896783844428e-05, |
| "loss": 0.0336, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.25364047024494263, |
| "grad_norm": 0.27692848443984985, |
| "learning_rate": 1.6888556469708303e-05, |
| "loss": 0.0358, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2558850761763138, |
| "grad_norm": 0.344008207321167, |
| "learning_rate": 1.7038145100972326e-05, |
| "loss": 0.0356, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.25812968210768494, |
| "grad_norm": 0.2950272858142853, |
| "learning_rate": 1.7187733732236352e-05, |
| "loss": 0.0351, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.26037428803905616, |
| "grad_norm": 0.28355881571769714, |
| "learning_rate": 1.7337322363500375e-05, |
| "loss": 0.0328, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2626188939704273, |
| "grad_norm": 0.3065374493598938, |
| "learning_rate": 1.7486910994764398e-05, |
| "loss": 0.0354, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.26486349990179847, |
| "grad_norm": 0.249955415725708, |
| "learning_rate": 1.763649962602842e-05, |
| "loss": 0.0342, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2671081058331697, |
| "grad_norm": 0.26293373107910156, |
| "learning_rate": 1.7786088257292447e-05, |
| "loss": 0.0334, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.26935271176454084, |
| "grad_norm": 0.27912405133247375, |
| "learning_rate": 1.793567688855647e-05, |
| "loss": 0.0341, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.271597317695912, |
| "grad_norm": 0.2818824052810669, |
| "learning_rate": 1.8085265519820493e-05, |
| "loss": 0.0334, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2738419236272832, |
| "grad_norm": 0.3036912977695465, |
| "learning_rate": 1.823485415108452e-05, |
| "loss": 0.0321, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.27608652955865437, |
| "grad_norm": 0.24145382642745972, |
| "learning_rate": 1.8384442782348542e-05, |
| "loss": 0.0335, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2783311354900255, |
| "grad_norm": 0.3205563724040985, |
| "learning_rate": 1.853403141361257e-05, |
| "loss": 0.0337, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.2805757414213967, |
| "grad_norm": 0.24150335788726807, |
| "learning_rate": 1.868362004487659e-05, |
| "loss": 0.0339, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.2828203473527679, |
| "grad_norm": 0.25747033953666687, |
| "learning_rate": 1.8833208676140614e-05, |
| "loss": 0.0323, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.28506495328413906, |
| "grad_norm": 0.2524860203266144, |
| "learning_rate": 1.898279730740464e-05, |
| "loss": 0.0333, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2873095592155102, |
| "grad_norm": 0.20733146369457245, |
| "learning_rate": 1.9132385938668663e-05, |
| "loss": 0.032, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.2895541651468814, |
| "grad_norm": 0.24969425797462463, |
| "learning_rate": 1.9281974569932686e-05, |
| "loss": 0.0334, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2917987710782526, |
| "grad_norm": 0.26867517828941345, |
| "learning_rate": 1.943156320119671e-05, |
| "loss": 0.0317, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.29404337700962374, |
| "grad_norm": 0.23903688788414001, |
| "learning_rate": 1.9581151832460736e-05, |
| "loss": 0.0324, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2962879829409949, |
| "grad_norm": 0.24634772539138794, |
| "learning_rate": 1.973074046372476e-05, |
| "loss": 0.0315, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2985325888723661, |
| "grad_norm": 0.24410435557365417, |
| "learning_rate": 1.988032909498878e-05, |
| "loss": 0.0307, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.30077719480373727, |
| "grad_norm": 0.24449145793914795, |
| "learning_rate": 1.9999998636276648e-05, |
| "loss": 0.0311, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3030218007351084, |
| "grad_norm": 0.2650293707847595, |
| "learning_rate": 1.999995090599835e-05, |
| "loss": 0.0327, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.30526640666647964, |
| "grad_norm": 0.2856460511684418, |
| "learning_rate": 1.9999834989924354e-05, |
| "loss": 0.0305, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3075110125978508, |
| "grad_norm": 0.23453539609909058, |
| "learning_rate": 1.9999650888845042e-05, |
| "loss": 0.0314, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.30975561852922195, |
| "grad_norm": 0.25706619024276733, |
| "learning_rate": 1.9999398604015737e-05, |
| "loss": 0.0305, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.3120002244605931, |
| "grad_norm": 0.26156002283096313, |
| "learning_rate": 1.9999078137156663e-05, |
| "loss": 0.031, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.3142448303919643, |
| "grad_norm": 0.3038729131221771, |
| "learning_rate": 1.9998689490452966e-05, |
| "loss": 0.0312, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3164894363233355, |
| "grad_norm": 0.393939346075058, |
| "learning_rate": 1.9998232666554674e-05, |
| "loss": 0.0361, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.31873404225470664, |
| "grad_norm": 0.3251310884952545, |
| "learning_rate": 1.999770766857669e-05, |
| "loss": 0.0338, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.32097864818607785, |
| "grad_norm": 0.2506806552410126, |
| "learning_rate": 1.999711450009878e-05, |
| "loss": 0.0311, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.323223254117449, |
| "grad_norm": 0.274758517742157, |
| "learning_rate": 1.999645316516552e-05, |
| "loss": 0.0309, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.32546786004882017, |
| "grad_norm": 0.2681165337562561, |
| "learning_rate": 1.9995723668286304e-05, |
| "loss": 0.0299, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3277124659801913, |
| "grad_norm": 0.2143799513578415, |
| "learning_rate": 1.999492601443529e-05, |
| "loss": 0.03, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.32995707191156254, |
| "grad_norm": 0.2572579085826874, |
| "learning_rate": 1.9994060209051366e-05, |
| "loss": 0.0303, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.3322016778429337, |
| "grad_norm": 0.21482343971729279, |
| "learning_rate": 1.9993126258038126e-05, |
| "loss": 0.0301, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.33444628377430485, |
| "grad_norm": 0.2568591237068176, |
| "learning_rate": 1.9992124167763823e-05, |
| "loss": 0.0315, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.33669088970567607, |
| "grad_norm": 0.2510339617729187, |
| "learning_rate": 1.9991053945061318e-05, |
| "loss": 0.0299, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3389354956370472, |
| "grad_norm": 0.2693713903427124, |
| "learning_rate": 1.9989915597228048e-05, |
| "loss": 0.0302, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3411801015684184, |
| "grad_norm": 0.2134774774312973, |
| "learning_rate": 1.9988709132025967e-05, |
| "loss": 0.0286, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.3434247074997896, |
| "grad_norm": 0.21737788617610931, |
| "learning_rate": 1.9987434557681492e-05, |
| "loss": 0.029, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.34566931343116075, |
| "grad_norm": 0.21042650938034058, |
| "learning_rate": 1.998609188288546e-05, |
| "loss": 0.0298, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3479139193625319, |
| "grad_norm": 0.24526509642601013, |
| "learning_rate": 1.998468111679304e-05, |
| "loss": 0.0285, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.35015852529390307, |
| "grad_norm": 0.2452835738658905, |
| "learning_rate": 1.9983202269023706e-05, |
| "loss": 0.0285, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3524031312252743, |
| "grad_norm": 0.21953906118869781, |
| "learning_rate": 1.9981655349661146e-05, |
| "loss": 0.0288, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.35464773715664544, |
| "grad_norm": 0.2213865965604782, |
| "learning_rate": 1.9980040369253206e-05, |
| "loss": 0.0294, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.3568923430880166, |
| "grad_norm": 0.24777103960514069, |
| "learning_rate": 1.9978357338811814e-05, |
| "loss": 0.0302, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3591369490193878, |
| "grad_norm": 0.23945562541484833, |
| "learning_rate": 1.9976606269812904e-05, |
| "loss": 0.029, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.36138155495075897, |
| "grad_norm": 0.1871986836194992, |
| "learning_rate": 1.9974787174196332e-05, |
| "loss": 0.0279, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.3636261608821301, |
| "grad_norm": 0.22911369800567627, |
| "learning_rate": 1.9972900064365817e-05, |
| "loss": 0.0299, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3658707668135013, |
| "grad_norm": 0.21428261697292328, |
| "learning_rate": 1.9970944953188818e-05, |
| "loss": 0.0273, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.3681153727448725, |
| "grad_norm": 0.240596204996109, |
| "learning_rate": 1.9968921853996492e-05, |
| "loss": 0.0286, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.37035997867624365, |
| "grad_norm": 0.2429644763469696, |
| "learning_rate": 1.9966830780583564e-05, |
| "loss": 0.0284, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3726045846076148, |
| "grad_norm": 0.2194654494524002, |
| "learning_rate": 1.996467174720826e-05, |
| "loss": 0.0288, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.374849190538986, |
| "grad_norm": 0.2488565593957901, |
| "learning_rate": 1.9962444768592185e-05, |
| "loss": 0.0279, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.3770937964703572, |
| "grad_norm": 0.22848640382289886, |
| "learning_rate": 1.996014985992025e-05, |
| "loss": 0.0281, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.37933840240172834, |
| "grad_norm": 0.20066608488559723, |
| "learning_rate": 1.9957787036840548e-05, |
| "loss": 0.0289, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3815830083330995, |
| "grad_norm": 0.22604599595069885, |
| "learning_rate": 1.9955356315464257e-05, |
| "loss": 0.0295, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3838276142644707, |
| "grad_norm": 0.24926242232322693, |
| "learning_rate": 1.9952857712365523e-05, |
| "loss": 0.0293, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.38607222019584186, |
| "grad_norm": 0.2565000653266907, |
| "learning_rate": 1.9950291244581356e-05, |
| "loss": 0.0292, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.388316826127213, |
| "grad_norm": 0.18487876653671265, |
| "learning_rate": 1.9947656929611502e-05, |
| "loss": 0.0268, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.39056143205858423, |
| "grad_norm": 0.21695543825626373, |
| "learning_rate": 1.994495478541834e-05, |
| "loss": 0.0278, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3928060379899554, |
| "grad_norm": 0.18348629772663116, |
| "learning_rate": 1.9942184830426744e-05, |
| "loss": 0.0274, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.39505064392132655, |
| "grad_norm": 0.235065758228302, |
| "learning_rate": 1.9939347083523967e-05, |
| "loss": 0.0284, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.39729524985269776, |
| "grad_norm": 0.2515522539615631, |
| "learning_rate": 1.9936441564059504e-05, |
| "loss": 0.0277, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3995398557840689, |
| "grad_norm": 0.22060731053352356, |
| "learning_rate": 1.9933468291844972e-05, |
| "loss": 0.0263, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4017844617154401, |
| "grad_norm": 0.21448932588100433, |
| "learning_rate": 1.993042728715396e-05, |
| "loss": 0.0277, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.40402906764681124, |
| "grad_norm": 0.20610381662845612, |
| "learning_rate": 1.9927318570721903e-05, |
| "loss": 0.0284, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.40627367357818245, |
| "grad_norm": 0.21657615900039673, |
| "learning_rate": 1.9924142163745935e-05, |
| "loss": 0.0263, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4085182795095536, |
| "grad_norm": 0.20956206321716309, |
| "learning_rate": 1.992089808788475e-05, |
| "loss": 0.027, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.41076288544092476, |
| "grad_norm": 0.20613710582256317, |
| "learning_rate": 1.991758636525844e-05, |
| "loss": 0.0262, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.413007491372296, |
| "grad_norm": 0.18393391370773315, |
| "learning_rate": 1.9914207018448363e-05, |
| "loss": 0.0248, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.41525209730366713, |
| "grad_norm": 0.19099575281143188, |
| "learning_rate": 1.991076007049698e-05, |
| "loss": 0.0252, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4174967032350383, |
| "grad_norm": 0.20824317634105682, |
| "learning_rate": 1.990724554490769e-05, |
| "loss": 0.0265, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.41974130916640945, |
| "grad_norm": 0.219533771276474, |
| "learning_rate": 1.9903663465644687e-05, |
| "loss": 0.0241, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.42198591509778066, |
| "grad_norm": 0.22310471534729004, |
| "learning_rate": 1.990001385713278e-05, |
| "loss": 0.0261, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4242305210291518, |
| "grad_norm": 0.18399077653884888, |
| "learning_rate": 1.9896296744257243e-05, |
| "loss": 0.0243, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.426475126960523, |
| "grad_norm": 0.17510205507278442, |
| "learning_rate": 1.989251215236362e-05, |
| "loss": 0.0248, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4287197328918942, |
| "grad_norm": 0.20756864547729492, |
| "learning_rate": 1.988866010725759e-05, |
| "loss": 0.0266, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.43096433882326535, |
| "grad_norm": 0.20950593054294586, |
| "learning_rate": 1.9884740635204755e-05, |
| "loss": 0.0265, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4332089447546365, |
| "grad_norm": 0.17630548775196075, |
| "learning_rate": 1.9880753762930473e-05, |
| "loss": 0.0251, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.43545355068600766, |
| "grad_norm": 0.17798230051994324, |
| "learning_rate": 1.9876699517619686e-05, |
| "loss": 0.0272, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4376981566173789, |
| "grad_norm": 0.17199009656906128, |
| "learning_rate": 1.9872577926916727e-05, |
| "loss": 0.0249, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.43994276254875003, |
| "grad_norm": 0.19935180246829987, |
| "learning_rate": 1.9868389018925126e-05, |
| "loss": 0.0262, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.4421873684801212, |
| "grad_norm": 0.2195315659046173, |
| "learning_rate": 1.9864132822207424e-05, |
| "loss": 0.026, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4444319744114924, |
| "grad_norm": 0.2122250646352768, |
| "learning_rate": 1.9859809365784982e-05, |
| "loss": 0.0263, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.44667658034286356, |
| "grad_norm": 0.21738848090171814, |
| "learning_rate": 1.9855418679137775e-05, |
| "loss": 0.0263, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4489211862742347, |
| "grad_norm": 0.21562571823596954, |
| "learning_rate": 1.9850960792204197e-05, |
| "loss": 0.0261, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4511657922056059, |
| "grad_norm": 0.1967390477657318, |
| "learning_rate": 1.9846435735380855e-05, |
| "loss": 0.025, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.4534103981369771, |
| "grad_norm": 0.1860983669757843, |
| "learning_rate": 1.9841843539522353e-05, |
| "loss": 0.0234, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.45565500406834825, |
| "grad_norm": 0.20370668172836304, |
| "learning_rate": 1.983718423594111e-05, |
| "loss": 0.0246, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4578996099997194, |
| "grad_norm": 0.20051617920398712, |
| "learning_rate": 1.9832457856407104e-05, |
| "loss": 0.0254, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4601442159310906, |
| "grad_norm": 0.19836241006851196, |
| "learning_rate": 1.9827664433147694e-05, |
| "loss": 0.0259, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.4623888218624618, |
| "grad_norm": 0.21288608014583588, |
| "learning_rate": 1.9822803998847378e-05, |
| "loss": 0.026, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.46463342779383293, |
| "grad_norm": 0.21260212361812592, |
| "learning_rate": 1.9817876586647572e-05, |
| "loss": 0.0254, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.46687803372520414, |
| "grad_norm": 0.19190289080142975, |
| "learning_rate": 1.98128822301464e-05, |
| "loss": 0.0245, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4691226396565753, |
| "grad_norm": 0.17896448075771332, |
| "learning_rate": 1.980782096339844e-05, |
| "loss": 0.0254, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.47136724558794646, |
| "grad_norm": 0.20132991671562195, |
| "learning_rate": 1.9802692820914514e-05, |
| "loss": 0.0245, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.4736118515193176, |
| "grad_norm": 0.18621641397476196, |
| "learning_rate": 1.9797497837661442e-05, |
| "loss": 0.0244, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.47585645745068883, |
| "grad_norm": 0.19119122624397278, |
| "learning_rate": 1.97922360490618e-05, |
| "loss": 0.0252, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.47810106338206, |
| "grad_norm": 0.21065153181552887, |
| "learning_rate": 1.9786907490993698e-05, |
| "loss": 0.0241, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.48034566931343115, |
| "grad_norm": 0.20101730525493622, |
| "learning_rate": 1.97815121997905e-05, |
| "loss": 0.025, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.48259027524480236, |
| "grad_norm": 0.19666339457035065, |
| "learning_rate": 1.9776050212240624e-05, |
| "loss": 0.0247, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.4848348811761735, |
| "grad_norm": 0.19246205687522888, |
| "learning_rate": 1.9770521565587234e-05, |
| "loss": 0.0243, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4870794871075447, |
| "grad_norm": 0.1777629405260086, |
| "learning_rate": 1.9764926297528036e-05, |
| "loss": 0.025, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.48932409303891583, |
| "grad_norm": 0.16932068765163422, |
| "learning_rate": 1.9759264446215006e-05, |
| "loss": 0.0232, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.49156869897028704, |
| "grad_norm": 0.20206604897975922, |
| "learning_rate": 1.975353605025411e-05, |
| "loss": 0.0248, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4938133049016582, |
| "grad_norm": 0.2303825467824936, |
| "learning_rate": 1.9747741148705063e-05, |
| "loss": 0.0247, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.49605791083302936, |
| "grad_norm": 0.1818767488002777, |
| "learning_rate": 1.974187978108106e-05, |
| "loss": 0.0242, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.49830251676440057, |
| "grad_norm": 0.1894429624080658, |
| "learning_rate": 1.9735951987348496e-05, |
| "loss": 0.0244, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5005471226957717, |
| "grad_norm": 0.17991645634174347, |
| "learning_rate": 1.9729957807926705e-05, |
| "loss": 0.0249, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.5027917286271429, |
| "grad_norm": 0.214262455701828, |
| "learning_rate": 1.9723897283687673e-05, |
| "loss": 0.0231, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.505036334558514, |
| "grad_norm": 0.19449850916862488, |
| "learning_rate": 1.9717770455955776e-05, |
| "loss": 0.0237, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5072809404898853, |
| "grad_norm": 0.1926821768283844, |
| "learning_rate": 1.9711577366507477e-05, |
| "loss": 0.0248, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5095255464212565, |
| "grad_norm": 0.2037525773048401, |
| "learning_rate": 1.9705318057571056e-05, |
| "loss": 0.023, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5117701523526276, |
| "grad_norm": 0.1556444764137268, |
| "learning_rate": 1.9698992571826322e-05, |
| "loss": 0.0234, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5140147582839988, |
| "grad_norm": 0.20549353957176208, |
| "learning_rate": 1.9692600952404312e-05, |
| "loss": 0.0232, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5162593642153699, |
| "grad_norm": 0.1754326969385147, |
| "learning_rate": 1.9686143242887e-05, |
| "loss": 0.0237, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5185039701467411, |
| "grad_norm": 0.1987963318824768, |
| "learning_rate": 1.967961948730702e-05, |
| "loss": 0.0251, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5207485760781123, |
| "grad_norm": 0.19438199698925018, |
| "learning_rate": 1.9673029730147332e-05, |
| "loss": 0.0239, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5229931820094834, |
| "grad_norm": 0.19073012471199036, |
| "learning_rate": 1.966637401634094e-05, |
| "loss": 0.0227, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5252377879408546, |
| "grad_norm": 0.1820707619190216, |
| "learning_rate": 1.9659652391270576e-05, |
| "loss": 0.023, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5274823938722258, |
| "grad_norm": 0.17488014698028564, |
| "learning_rate": 1.965286490076841e-05, |
| "loss": 0.0241, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5297269998035969, |
| "grad_norm": 0.20880946516990662, |
| "learning_rate": 1.9646011591115706e-05, |
| "loss": 0.024, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5319716057349682, |
| "grad_norm": 0.18289141356945038, |
| "learning_rate": 1.9639092509042533e-05, |
| "loss": 0.0241, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5342162116663394, |
| "grad_norm": 0.20128047466278076, |
| "learning_rate": 1.963210770172743e-05, |
| "loss": 0.0237, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5364608175977105, |
| "grad_norm": 0.17308512330055237, |
| "learning_rate": 1.9625057216797106e-05, |
| "loss": 0.025, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5387054235290817, |
| "grad_norm": 0.19591808319091797, |
| "learning_rate": 1.961794110232608e-05, |
| "loss": 0.022, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5409500294604529, |
| "grad_norm": 0.16618050634860992, |
| "learning_rate": 1.9610759406836382e-05, |
| "loss": 0.0227, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.543194635391824, |
| "grad_norm": 0.17903420329093933, |
| "learning_rate": 1.9603512179297226e-05, |
| "loss": 0.0236, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5454392413231952, |
| "grad_norm": 0.20347997546195984, |
| "learning_rate": 1.959619946912464e-05, |
| "loss": 0.0231, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5476838472545664, |
| "grad_norm": 0.22455480694770813, |
| "learning_rate": 1.9588821326181172e-05, |
| "loss": 0.0248, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5499284531859375, |
| "grad_norm": 0.16458284854888916, |
| "learning_rate": 1.9581377800775518e-05, |
| "loss": 0.0224, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5521730591173087, |
| "grad_norm": 0.1932162195444107, |
| "learning_rate": 1.95738689436622e-05, |
| "loss": 0.0235, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5544176650486798, |
| "grad_norm": 0.1769714504480362, |
| "learning_rate": 1.9566294806041214e-05, |
| "loss": 0.0215, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.556662270980051, |
| "grad_norm": 0.20865550637245178, |
| "learning_rate": 1.9558655439557665e-05, |
| "loss": 0.0223, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5589068769114223, |
| "grad_norm": 0.2173549085855484, |
| "learning_rate": 1.9550950896301445e-05, |
| "loss": 0.0234, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5611514828427934, |
| "grad_norm": 0.18162918090820312, |
| "learning_rate": 1.954318122880685e-05, |
| "loss": 0.0243, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5633960887741646, |
| "grad_norm": 0.17128832638263702, |
| "learning_rate": 1.9535346490052235e-05, |
| "loss": 0.0239, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5656406947055358, |
| "grad_norm": 0.18167993426322937, |
| "learning_rate": 1.952744673345965e-05, |
| "loss": 0.0241, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5678853006369069, |
| "grad_norm": 0.20374241471290588, |
| "learning_rate": 1.9519482012894483e-05, |
| "loss": 0.0235, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5701299065682781, |
| "grad_norm": 0.17413835227489471, |
| "learning_rate": 1.9511452382665077e-05, |
| "loss": 0.0226, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5723745124996493, |
| "grad_norm": 0.20678037405014038, |
| "learning_rate": 1.9503357897522378e-05, |
| "loss": 0.0238, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5746191184310204, |
| "grad_norm": 0.17220425605773926, |
| "learning_rate": 1.949519861265954e-05, |
| "loss": 0.0226, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5768637243623916, |
| "grad_norm": 0.19188691675662994, |
| "learning_rate": 1.9486974583711584e-05, |
| "loss": 0.0223, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5791083302937629, |
| "grad_norm": 0.18967117369174957, |
| "learning_rate": 1.947868586675497e-05, |
| "loss": 0.0234, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.581352936225134, |
| "grad_norm": 0.16889534890651703, |
| "learning_rate": 1.947033251830725e-05, |
| "loss": 0.0223, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5835975421565052, |
| "grad_norm": 0.23073066771030426, |
| "learning_rate": 1.9461914595326684e-05, |
| "loss": 0.0228, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5858421480878763, |
| "grad_norm": 0.19199195504188538, |
| "learning_rate": 1.945343215521182e-05, |
| "loss": 0.0217, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5880867540192475, |
| "grad_norm": 0.1747967153787613, |
| "learning_rate": 1.944488525580114e-05, |
| "loss": 0.0218, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5903313599506187, |
| "grad_norm": 0.16960597038269043, |
| "learning_rate": 1.943627395537265e-05, |
| "loss": 0.0232, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5925759658819898, |
| "grad_norm": 1.8774281740188599, |
| "learning_rate": 1.9427598312643467e-05, |
| "loss": 0.0268, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.594820571813361, |
| "grad_norm": 0.26028814911842346, |
| "learning_rate": 1.9418858386769447e-05, |
| "loss": 0.0327, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5970651777447322, |
| "grad_norm": 0.22514741122722626, |
| "learning_rate": 1.9410054237344757e-05, |
| "loss": 0.0288, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5993097836761033, |
| "grad_norm": 0.21931113302707672, |
| "learning_rate": 1.9401185924401488e-05, |
| "loss": 0.0251, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.6015543896074745, |
| "grad_norm": 0.1910109966993332, |
| "learning_rate": 1.9392253508409235e-05, |
| "loss": 0.0242, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.6037989955388458, |
| "grad_norm": 0.17131438851356506, |
| "learning_rate": 1.938325705027469e-05, |
| "loss": 0.0234, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.6060436014702169, |
| "grad_norm": 0.1875181645154953, |
| "learning_rate": 1.9374196611341212e-05, |
| "loss": 0.0241, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6082882074015881, |
| "grad_norm": 0.18994557857513428, |
| "learning_rate": 1.936507225338843e-05, |
| "loss": 0.0226, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6105328133329593, |
| "grad_norm": 0.19114020466804504, |
| "learning_rate": 1.9355884038631812e-05, |
| "loss": 0.023, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6127774192643304, |
| "grad_norm": 0.18819326162338257, |
| "learning_rate": 1.9346632029722243e-05, |
| "loss": 0.023, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6150220251957016, |
| "grad_norm": 0.2011801302433014, |
| "learning_rate": 1.9337316289745586e-05, |
| "loss": 0.0233, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6172666311270728, |
| "grad_norm": 0.176308736205101, |
| "learning_rate": 1.9327936882222267e-05, |
| "loss": 0.0227, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6195112370584439, |
| "grad_norm": 0.1711064577102661, |
| "learning_rate": 1.931849387110684e-05, |
| "loss": 0.0238, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6217558429898151, |
| "grad_norm": 0.1591506451368332, |
| "learning_rate": 1.930898732078754e-05, |
| "loss": 0.0218, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6240004489211862, |
| "grad_norm": 0.17407435178756714, |
| "learning_rate": 1.929941729608586e-05, |
| "loss": 0.0235, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6262450548525574, |
| "grad_norm": 0.16112394630908966, |
| "learning_rate": 1.9289783862256087e-05, |
| "loss": 0.0213, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6284896607839286, |
| "grad_norm": 0.15335716307163239, |
| "learning_rate": 1.9280087084984892e-05, |
| "loss": 0.0228, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6307342667152998, |
| "grad_norm": 0.18014346063137054, |
| "learning_rate": 1.9270327030390832e-05, |
| "loss": 0.0233, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.632978872646671, |
| "grad_norm": 0.16869892179965973, |
| "learning_rate": 1.9260503765023954e-05, |
| "loss": 0.022, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6352234785780422, |
| "grad_norm": 0.1692594587802887, |
| "learning_rate": 1.9250617355865292e-05, |
| "loss": 0.0226, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6374680845094133, |
| "grad_norm": 0.19598528742790222, |
| "learning_rate": 1.924066787032646e-05, |
| "loss": 0.0214, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6397126904407845, |
| "grad_norm": 0.17125625908374786, |
| "learning_rate": 1.9230655376249134e-05, |
| "loss": 0.022, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6419572963721557, |
| "grad_norm": 0.1635292023420334, |
| "learning_rate": 1.9220579941904645e-05, |
| "loss": 0.0217, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6442019023035268, |
| "grad_norm": 0.18467473983764648, |
| "learning_rate": 1.9210441635993483e-05, |
| "loss": 0.0226, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.646446508234898, |
| "grad_norm": 0.17099305987358093, |
| "learning_rate": 1.9200240527644828e-05, |
| "loss": 0.021, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6486911141662692, |
| "grad_norm": 0.15335629880428314, |
| "learning_rate": 1.918997668641609e-05, |
| "loss": 0.0207, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6509357200976403, |
| "grad_norm": 0.15978077054023743, |
| "learning_rate": 1.9179650182292436e-05, |
| "loss": 0.0214, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6531803260290115, |
| "grad_norm": 0.1606997549533844, |
| "learning_rate": 1.91692610856863e-05, |
| "loss": 0.021, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6554249319603827, |
| "grad_norm": 0.16981342434883118, |
| "learning_rate": 1.915880946743691e-05, |
| "loss": 0.0218, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6576695378917539, |
| "grad_norm": 0.17529399693012238, |
| "learning_rate": 1.91482953988098e-05, |
| "loss": 0.0217, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6599141438231251, |
| "grad_norm": 0.1750255823135376, |
| "learning_rate": 1.9137718951496352e-05, |
| "loss": 0.0221, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6621587497544962, |
| "grad_norm": 0.17034149169921875, |
| "learning_rate": 1.912708019761325e-05, |
| "loss": 0.021, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6644033556858674, |
| "grad_norm": 0.1882370561361313, |
| "learning_rate": 1.9116379209702056e-05, |
| "loss": 0.0208, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6666479616172386, |
| "grad_norm": 0.17587997019290924, |
| "learning_rate": 1.910561606072865e-05, |
| "loss": 0.0208, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6688925675486097, |
| "grad_norm": 0.18303704261779785, |
| "learning_rate": 1.909479082408279e-05, |
| "loss": 0.0196, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6711371734799809, |
| "grad_norm": 0.1817975491285324, |
| "learning_rate": 1.908390357357758e-05, |
| "loss": 0.0209, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6733817794113521, |
| "grad_norm": 0.194996640086174, |
| "learning_rate": 1.9072954383448955e-05, |
| "loss": 0.0215, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6756263853427232, |
| "grad_norm": 0.20948557555675507, |
| "learning_rate": 1.9061943328355214e-05, |
| "loss": 0.02, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6778709912740944, |
| "grad_norm": 0.17736251652240753, |
| "learning_rate": 1.905087048337648e-05, |
| "loss": 0.0202, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6801155972054657, |
| "grad_norm": 0.14099887013435364, |
| "learning_rate": 1.9039735924014197e-05, |
| "loss": 0.0203, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6823602031368368, |
| "grad_norm": 0.16595132648944855, |
| "learning_rate": 1.902853972619062e-05, |
| "loss": 0.0213, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.684604809068208, |
| "grad_norm": 0.14856795966625214, |
| "learning_rate": 1.9017281966248284e-05, |
| "loss": 0.02, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6868494149995792, |
| "grad_norm": 0.16550858318805695, |
| "learning_rate": 1.90059627209495e-05, |
| "loss": 0.0214, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6890940209309503, |
| "grad_norm": 0.20143896341323853, |
| "learning_rate": 1.8994582067475827e-05, |
| "loss": 0.0208, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6913386268623215, |
| "grad_norm": 0.16624027490615845, |
| "learning_rate": 1.8983140083427528e-05, |
| "loss": 0.0202, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6935832327936926, |
| "grad_norm": 0.17451374232769012, |
| "learning_rate": 1.8971636846823074e-05, |
| "loss": 0.0215, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6958278387250638, |
| "grad_norm": 0.18585652112960815, |
| "learning_rate": 1.8960072436098577e-05, |
| "loss": 0.0194, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.698072444656435, |
| "grad_norm": 0.1638859361410141, |
| "learning_rate": 1.894844693010728e-05, |
| "loss": 0.0203, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.7003170505878061, |
| "grad_norm": 0.18684209883213043, |
| "learning_rate": 1.8936760408119012e-05, |
| "loss": 0.0223, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.7025616565191773, |
| "grad_norm": 0.1838609278202057, |
| "learning_rate": 1.8925012949819645e-05, |
| "loss": 0.0216, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.7048062624505486, |
| "grad_norm": 0.17233972251415253, |
| "learning_rate": 1.8913204635310548e-05, |
| "loss": 0.0201, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.7070508683819197, |
| "grad_norm": 0.18251539766788483, |
| "learning_rate": 1.8901335545108054e-05, |
| "loss": 0.0207, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.7092954743132909, |
| "grad_norm": 0.1725347936153412, |
| "learning_rate": 1.8889405760142887e-05, |
| "loss": 0.0209, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.7115400802446621, |
| "grad_norm": 0.17256340384483337, |
| "learning_rate": 1.8877415361759645e-05, |
| "loss": 0.0209, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.7137846861760332, |
| "grad_norm": 0.1629970371723175, |
| "learning_rate": 1.8865364431716205e-05, |
| "loss": 0.0208, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.7160292921074044, |
| "grad_norm": 0.20335057377815247, |
| "learning_rate": 1.8853253052183206e-05, |
| "loss": 0.0206, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.7182738980387756, |
| "grad_norm": 0.15751416981220245, |
| "learning_rate": 1.8841081305743447e-05, |
| "loss": 0.0202, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7205185039701467, |
| "grad_norm": 0.15457507967948914, |
| "learning_rate": 1.8828849275391366e-05, |
| "loss": 0.0214, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.7227631099015179, |
| "grad_norm": 0.1803683042526245, |
| "learning_rate": 1.8816557044532443e-05, |
| "loss": 0.0196, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.7250077158328891, |
| "grad_norm": 0.18161389231681824, |
| "learning_rate": 1.880420469698264e-05, |
| "loss": 0.0212, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.7272523217642602, |
| "grad_norm": 0.17406050860881805, |
| "learning_rate": 1.879179231696784e-05, |
| "loss": 0.02, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.7294969276956315, |
| "grad_norm": 0.15951761603355408, |
| "learning_rate": 1.8779319989123253e-05, |
| "loss": 0.0202, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.7317415336270026, |
| "grad_norm": 0.17296704649925232, |
| "learning_rate": 1.8766787798492863e-05, |
| "loss": 0.0195, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.7339861395583738, |
| "grad_norm": 0.16530241072177887, |
| "learning_rate": 1.8754195830528826e-05, |
| "loss": 0.0208, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.736230745489745, |
| "grad_norm": 0.16106492280960083, |
| "learning_rate": 1.8741544171090898e-05, |
| "loss": 0.0209, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.7384753514211161, |
| "grad_norm": 0.17415162920951843, |
| "learning_rate": 1.872883290644585e-05, |
| "loss": 0.021, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.7407199573524873, |
| "grad_norm": 0.16503530740737915, |
| "learning_rate": 1.8716062123266877e-05, |
| "loss": 0.0199, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7429645632838585, |
| "grad_norm": 0.17592734098434448, |
| "learning_rate": 1.8703231908633007e-05, |
| "loss": 0.0199, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.7452091692152296, |
| "grad_norm": 0.16938206553459167, |
| "learning_rate": 1.8690342350028513e-05, |
| "loss": 0.0195, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.7474537751466008, |
| "grad_norm": 0.16405285894870758, |
| "learning_rate": 1.8677393535342298e-05, |
| "loss": 0.021, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.749698381077972, |
| "grad_norm": 0.16769342124462128, |
| "learning_rate": 1.8664385552867332e-05, |
| "loss": 0.0199, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7519429870093431, |
| "grad_norm": 0.17460818588733673, |
| "learning_rate": 1.8651318491300002e-05, |
| "loss": 0.0206, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7541875929407144, |
| "grad_norm": 0.1495262235403061, |
| "learning_rate": 1.863819243973955e-05, |
| "loss": 0.0194, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7564321988720856, |
| "grad_norm": 0.15108689665794373, |
| "learning_rate": 1.862500748768744e-05, |
| "loss": 0.0198, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7586768048034567, |
| "grad_norm": 0.15738345682621002, |
| "learning_rate": 1.8611763725046758e-05, |
| "loss": 0.0191, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7609214107348279, |
| "grad_norm": 0.14987443387508392, |
| "learning_rate": 1.85984612421216e-05, |
| "loss": 0.0186, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.763166016666199, |
| "grad_norm": 0.1464933604001999, |
| "learning_rate": 1.858510012961645e-05, |
| "loss": 0.0209, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7654106225975702, |
| "grad_norm": 0.14822939038276672, |
| "learning_rate": 1.857168047863556e-05, |
| "loss": 0.02, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7676552285289414, |
| "grad_norm": 0.1794120967388153, |
| "learning_rate": 1.8558202380682343e-05, |
| "loss": 0.0211, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7698998344603125, |
| "grad_norm": 0.15938477218151093, |
| "learning_rate": 1.8544665927658723e-05, |
| "loss": 0.0197, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.7721444403916837, |
| "grad_norm": 0.1710355430841446, |
| "learning_rate": 1.853107121186455e-05, |
| "loss": 0.0196, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7743890463230549, |
| "grad_norm": 0.15917006134986877, |
| "learning_rate": 1.8517418325996916e-05, |
| "loss": 0.0197, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.776633652254426, |
| "grad_norm": 0.15458229184150696, |
| "learning_rate": 1.8503707363149573e-05, |
| "loss": 0.0193, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7788782581857973, |
| "grad_norm": 0.1885480433702469, |
| "learning_rate": 1.8489938416812273e-05, |
| "loss": 0.0194, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7811228641171685, |
| "grad_norm": 0.1443496197462082, |
| "learning_rate": 1.847611158087013e-05, |
| "loss": 0.0196, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7833674700485396, |
| "grad_norm": 0.1544029414653778, |
| "learning_rate": 1.846222694960299e-05, |
| "loss": 0.0198, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7856120759799108, |
| "grad_norm": 0.17099037766456604, |
| "learning_rate": 1.8448284617684784e-05, |
| "loss": 0.0193, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.787856681911282, |
| "grad_norm": 0.1704227477312088, |
| "learning_rate": 1.843428468018287e-05, |
| "loss": 0.019, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7901012878426531, |
| "grad_norm": 0.1799788922071457, |
| "learning_rate": 1.842022723255741e-05, |
| "loss": 0.0199, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7923458937740243, |
| "grad_norm": 0.16611269116401672, |
| "learning_rate": 1.8406112370660695e-05, |
| "loss": 0.0206, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7945904997053955, |
| "grad_norm": 0.16963708400726318, |
| "learning_rate": 1.8391940190736514e-05, |
| "loss": 0.0186, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7968351056367666, |
| "grad_norm": 0.16803216934204102, |
| "learning_rate": 1.8377710789419473e-05, |
| "loss": 0.0185, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.7990797115681378, |
| "grad_norm": 0.15741732716560364, |
| "learning_rate": 1.8363424263734354e-05, |
| "loss": 0.0201, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.8013243174995089, |
| "grad_norm": 0.1796884834766388, |
| "learning_rate": 1.834908071109545e-05, |
| "loss": 0.019, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.8035689234308802, |
| "grad_norm": 0.15816806256771088, |
| "learning_rate": 1.8334680229305894e-05, |
| "loss": 0.0193, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.8058135293622514, |
| "grad_norm": 0.15980146825313568, |
| "learning_rate": 1.8320222916556996e-05, |
| "loss": 0.0198, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.8080581352936225, |
| "grad_norm": 0.14864562451839447, |
| "learning_rate": 1.8305708871427583e-05, |
| "loss": 0.0188, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8103027412249937, |
| "grad_norm": 0.145126610994339, |
| "learning_rate": 1.829113819288331e-05, |
| "loss": 0.0187, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.8125473471563649, |
| "grad_norm": 0.14894086122512817, |
| "learning_rate": 1.827651098027599e-05, |
| "loss": 0.0198, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.814791953087736, |
| "grad_norm": 0.1573464423418045, |
| "learning_rate": 1.8261827333342938e-05, |
| "loss": 0.0192, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.8170365590191072, |
| "grad_norm": 0.16083981096744537, |
| "learning_rate": 1.824708735220625e-05, |
| "loss": 0.0194, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.8192811649504784, |
| "grad_norm": 0.15828320384025574, |
| "learning_rate": 1.823229113737216e-05, |
| "loss": 0.0189, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.8215257708818495, |
| "grad_norm": 0.13140316307544708, |
| "learning_rate": 1.8217438789730325e-05, |
| "loss": 0.0194, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.8237703768132207, |
| "grad_norm": 0.16682444512844086, |
| "learning_rate": 1.8202530410553162e-05, |
| "loss": 0.0199, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.826014982744592, |
| "grad_norm": 0.15172256529331207, |
| "learning_rate": 1.818756610149514e-05, |
| "loss": 0.0189, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.828259588675963, |
| "grad_norm": 0.1695002317428589, |
| "learning_rate": 1.8172545964592087e-05, |
| "loss": 0.0197, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.8305041946073343, |
| "grad_norm": 0.16010014712810516, |
| "learning_rate": 1.815747010226051e-05, |
| "loss": 0.0182, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8327488005387054, |
| "grad_norm": 0.15437164902687073, |
| "learning_rate": 1.8142338617296878e-05, |
| "loss": 0.0198, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.8349934064700766, |
| "grad_norm": 0.17187710106372833, |
| "learning_rate": 1.812715161287693e-05, |
| "loss": 0.0195, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8372380124014478, |
| "grad_norm": 0.16505780816078186, |
| "learning_rate": 1.8111909192554976e-05, |
| "loss": 0.0177, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.8394826183328189, |
| "grad_norm": 0.161300927400589, |
| "learning_rate": 1.8096611460263178e-05, |
| "loss": 0.0189, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8417272242641901, |
| "grad_norm": 0.16342850029468536, |
| "learning_rate": 1.8081258520310856e-05, |
| "loss": 0.0183, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8439718301955613, |
| "grad_norm": 0.15704095363616943, |
| "learning_rate": 1.8065850477383767e-05, |
| "loss": 0.0183, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.8462164361269324, |
| "grad_norm": 0.1607922911643982, |
| "learning_rate": 1.805038743654339e-05, |
| "loss": 0.019, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8484610420583036, |
| "grad_norm": 0.16337986290454865, |
| "learning_rate": 1.803486950322622e-05, |
| "loss": 0.0193, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8507056479896749, |
| "grad_norm": 0.19210092723369598, |
| "learning_rate": 1.801929678324304e-05, |
| "loss": 0.0179, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.852950253921046, |
| "grad_norm": 0.1809729039669037, |
| "learning_rate": 1.80036693827782e-05, |
| "loss": 0.0193, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8551948598524172, |
| "grad_norm": 0.14291028678417206, |
| "learning_rate": 1.79879874083889e-05, |
| "loss": 0.0189, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8574394657837884, |
| "grad_norm": 0.2091740220785141, |
| "learning_rate": 1.7972250967004448e-05, |
| "loss": 0.0182, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8596840717151595, |
| "grad_norm": 0.15339423716068268, |
| "learning_rate": 1.7956460165925556e-05, |
| "loss": 0.0207, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8619286776465307, |
| "grad_norm": 0.1679808646440506, |
| "learning_rate": 1.794061511282358e-05, |
| "loss": 0.0178, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8641732835779019, |
| "grad_norm": 0.1681501418352127, |
| "learning_rate": 1.792471591573981e-05, |
| "loss": 0.0182, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.866417889509273, |
| "grad_norm": 0.14759108424186707, |
| "learning_rate": 1.790876268308472e-05, |
| "loss": 0.0184, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8686624954406442, |
| "grad_norm": 0.15728703141212463, |
| "learning_rate": 1.7892755523637224e-05, |
| "loss": 0.0178, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8709071013720153, |
| "grad_norm": 0.14030325412750244, |
| "learning_rate": 1.7876694546543955e-05, |
| "loss": 0.0189, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8731517073033865, |
| "grad_norm": 0.13951782882213593, |
| "learning_rate": 1.7860579861318496e-05, |
| "loss": 0.0191, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8753963132347578, |
| "grad_norm": 0.16399997472763062, |
| "learning_rate": 1.7844411577840654e-05, |
| "loss": 0.0188, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8776409191661289, |
| "grad_norm": 0.14510847628116608, |
| "learning_rate": 1.7828189806355694e-05, |
| "loss": 0.0176, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8798855250975001, |
| "grad_norm": 0.1528862714767456, |
| "learning_rate": 1.78119146574736e-05, |
| "loss": 0.0198, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8821301310288713, |
| "grad_norm": 0.2043169140815735, |
| "learning_rate": 1.7795586242168313e-05, |
| "loss": 0.0195, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8843747369602424, |
| "grad_norm": 0.15285201370716095, |
| "learning_rate": 1.7779204671776984e-05, |
| "loss": 0.0184, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8866193428916136, |
| "grad_norm": 0.1563611477613449, |
| "learning_rate": 1.7762770057999195e-05, |
| "loss": 0.0187, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8888639488229848, |
| "grad_norm": 0.15826715528964996, |
| "learning_rate": 1.7746282512896224e-05, |
| "loss": 0.0179, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8911085547543559, |
| "grad_norm": 0.14216351509094238, |
| "learning_rate": 1.7729742148890258e-05, |
| "loss": 0.0185, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8933531606857271, |
| "grad_norm": 0.17049328982830048, |
| "learning_rate": 1.7713149078763644e-05, |
| "loss": 0.0183, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8955977666170983, |
| "grad_norm": 0.14374832808971405, |
| "learning_rate": 1.7696503415658096e-05, |
| "loss": 0.0192, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8978423725484694, |
| "grad_norm": 0.18668319284915924, |
| "learning_rate": 1.767980527307396e-05, |
| "loss": 0.0198, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9000869784798406, |
| "grad_norm": 0.17893734574317932, |
| "learning_rate": 1.76630547648694e-05, |
| "loss": 0.0198, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.9023315844112118, |
| "grad_norm": 0.1579989641904831, |
| "learning_rate": 1.7646252005259657e-05, |
| "loss": 0.0182, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.904576190342583, |
| "grad_norm": 0.17703531682491302, |
| "learning_rate": 1.7629397108816242e-05, |
| "loss": 0.0177, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.9068207962739542, |
| "grad_norm": 0.14626428484916687, |
| "learning_rate": 1.7612490190466177e-05, |
| "loss": 0.0195, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.9090654022053253, |
| "grad_norm": 0.15867730975151062, |
| "learning_rate": 1.7595531365491192e-05, |
| "loss": 0.0174, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.9113100081366965, |
| "grad_norm": 0.14312785863876343, |
| "learning_rate": 1.7578520749526952e-05, |
| "loss": 0.0178, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.9135546140680677, |
| "grad_norm": 0.15664878487586975, |
| "learning_rate": 1.7561458458562265e-05, |
| "loss": 0.0186, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.9157992199994388, |
| "grad_norm": 0.1552259773015976, |
| "learning_rate": 1.7544344608938296e-05, |
| "loss": 0.0184, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.91804382593081, |
| "grad_norm": 0.1600690335035324, |
| "learning_rate": 1.7527179317347754e-05, |
| "loss": 0.0187, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.9202884318621812, |
| "grad_norm": 0.17803123593330383, |
| "learning_rate": 1.7509962700834128e-05, |
| "loss": 0.0175, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9225330377935523, |
| "grad_norm": 0.17079535126686096, |
| "learning_rate": 1.749269487679086e-05, |
| "loss": 0.0182, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.9247776437249235, |
| "grad_norm": 0.14618434011936188, |
| "learning_rate": 1.7475375962960564e-05, |
| "loss": 0.018, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.9270222496562948, |
| "grad_norm": 0.140808567404747, |
| "learning_rate": 1.7458006077434208e-05, |
| "loss": 0.0183, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.9292668555876659, |
| "grad_norm": 0.15096674859523773, |
| "learning_rate": 1.744058533865032e-05, |
| "loss": 0.0177, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.9315114615190371, |
| "grad_norm": 0.16479936242103577, |
| "learning_rate": 1.742311386539418e-05, |
| "loss": 0.0182, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.9337560674504083, |
| "grad_norm": 0.14364103972911835, |
| "learning_rate": 1.7405591776797e-05, |
| "loss": 0.0171, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.9360006733817794, |
| "grad_norm": 0.16919943690299988, |
| "learning_rate": 1.7388019192335123e-05, |
| "loss": 0.0184, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.9382452793131506, |
| "grad_norm": 0.1467256247997284, |
| "learning_rate": 1.7370396231829202e-05, |
| "loss": 0.0173, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.9404898852445217, |
| "grad_norm": 0.1647726446390152, |
| "learning_rate": 1.735272301544339e-05, |
| "loss": 0.0178, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.9427344911758929, |
| "grad_norm": 0.16072970628738403, |
| "learning_rate": 1.7334999663684504e-05, |
| "loss": 0.0173, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9449790971072641, |
| "grad_norm": 0.15171808004379272, |
| "learning_rate": 1.731722629740123e-05, |
| "loss": 0.0167, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9472237030386352, |
| "grad_norm": 0.13393864035606384, |
| "learning_rate": 1.7299403037783264e-05, |
| "loss": 0.0178, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9494683089700064, |
| "grad_norm": 0.18740731477737427, |
| "learning_rate": 1.7281530006360525e-05, |
| "loss": 0.0184, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9517129149013777, |
| "grad_norm": 0.13795392215251923, |
| "learning_rate": 1.7263607325002298e-05, |
| "loss": 0.018, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9539575208327488, |
| "grad_norm": 0.16634628176689148, |
| "learning_rate": 1.7245635115916403e-05, |
| "loss": 0.0186, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.95620212676412, |
| "grad_norm": 0.15677855908870697, |
| "learning_rate": 1.7227613501648388e-05, |
| "loss": 0.0179, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9584467326954912, |
| "grad_norm": 0.13720932602882385, |
| "learning_rate": 1.720954260508067e-05, |
| "loss": 0.0185, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.9606913386268623, |
| "grad_norm": 0.15041740238666534, |
| "learning_rate": 1.7191422549431692e-05, |
| "loss": 0.0176, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9629359445582335, |
| "grad_norm": 0.1521487534046173, |
| "learning_rate": 1.7173253458255103e-05, |
| "loss": 0.0179, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9651805504896047, |
| "grad_norm": 0.15372952818870544, |
| "learning_rate": 1.715503545543891e-05, |
| "loss": 0.0177, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9674251564209758, |
| "grad_norm": 0.16285191476345062, |
| "learning_rate": 1.713676866520462e-05, |
| "loss": 0.0179, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.969669762352347, |
| "grad_norm": 0.13474644720554352, |
| "learning_rate": 1.7118453212106408e-05, |
| "loss": 0.0175, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9719143682837182, |
| "grad_norm": 0.15485605597496033, |
| "learning_rate": 1.710008922103027e-05, |
| "loss": 0.0172, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9741589742150893, |
| "grad_norm": 0.18369507789611816, |
| "learning_rate": 1.7081676817193136e-05, |
| "loss": 0.0177, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9764035801464606, |
| "grad_norm": 0.15704116225242615, |
| "learning_rate": 1.7063216126142078e-05, |
| "loss": 0.0181, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9786481860778317, |
| "grad_norm": 0.14740833640098572, |
| "learning_rate": 1.7044707273753407e-05, |
| "loss": 0.0163, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9808927920092029, |
| "grad_norm": 0.13891027867794037, |
| "learning_rate": 1.7026150386231814e-05, |
| "loss": 0.0178, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.9831373979405741, |
| "grad_norm": 0.15418022871017456, |
| "learning_rate": 1.700754559010954e-05, |
| "loss": 0.0183, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.9853820038719452, |
| "grad_norm": 0.14405600726604462, |
| "learning_rate": 1.6988893012245494e-05, |
| "loss": 0.0167, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9876266098033164, |
| "grad_norm": 0.18409493565559387, |
| "learning_rate": 1.6970192779824383e-05, |
| "loss": 0.019, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9898712157346876, |
| "grad_norm": 0.15557466447353363, |
| "learning_rate": 1.6951445020355853e-05, |
| "loss": 0.0171, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9921158216660587, |
| "grad_norm": 0.1519443243741989, |
| "learning_rate": 1.6932649861673626e-05, |
| "loss": 0.0162, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.9943604275974299, |
| "grad_norm": 0.14214234054088593, |
| "learning_rate": 1.6913807431934612e-05, |
| "loss": 0.0179, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.9966050335288011, |
| "grad_norm": 0.14018166065216064, |
| "learning_rate": 1.6894917859618054e-05, |
| "loss": 0.0174, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.9988496394601722, |
| "grad_norm": 0.15058715641498566, |
| "learning_rate": 1.6875981273524632e-05, |
| "loss": 0.0174, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.0008978423725485, |
| "grad_norm": 0.17037492990493774, |
| "learning_rate": 1.68569978027756e-05, |
| "loss": 0.017, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.0031424483039197, |
| "grad_norm": 0.1476113498210907, |
| "learning_rate": 1.68379675768119e-05, |
| "loss": 0.0172, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.0053870542352907, |
| "grad_norm": 0.13747438788414001, |
| "learning_rate": 1.6818890725393275e-05, |
| "loss": 0.0157, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.007631660166662, |
| "grad_norm": 0.18724720180034637, |
| "learning_rate": 1.67997673785974e-05, |
| "loss": 0.0172, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.0098762660980332, |
| "grad_norm": 0.16652311384677887, |
| "learning_rate": 1.678059766681897e-05, |
| "loss": 0.0166, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0121208720294044, |
| "grad_norm": 0.14732131361961365, |
| "learning_rate": 1.6761381720768836e-05, |
| "loss": 0.0167, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.0143654779607756, |
| "grad_norm": 0.1441793441772461, |
| "learning_rate": 1.6742119671473096e-05, |
| "loss": 0.0165, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.0166100838921466, |
| "grad_norm": 0.14003914594650269, |
| "learning_rate": 1.6722811650272213e-05, |
| "loss": 0.0178, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.0188546898235178, |
| "grad_norm": 0.1431935429573059, |
| "learning_rate": 1.6703457788820113e-05, |
| "loss": 0.0173, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.021099295754889, |
| "grad_norm": 0.13941150903701782, |
| "learning_rate": 1.6684058219083283e-05, |
| "loss": 0.0161, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.0233439016862602, |
| "grad_norm": 0.1487084925174713, |
| "learning_rate": 1.6664613073339885e-05, |
| "loss": 0.0159, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.0255885076176314, |
| "grad_norm": 0.1264648735523224, |
| "learning_rate": 1.6645122484178847e-05, |
| "loss": 0.0163, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.0278331135490026, |
| "grad_norm": 0.1608554720878601, |
| "learning_rate": 1.662558658449895e-05, |
| "loss": 0.0159, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.0300777194803736, |
| "grad_norm": 0.15731705725193024, |
| "learning_rate": 1.6606005507507936e-05, |
| "loss": 0.0171, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.0323223254117448, |
| "grad_norm": 0.1623576283454895, |
| "learning_rate": 1.6586379386721593e-05, |
| "loss": 0.0167, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.034566931343116, |
| "grad_norm": 0.17850559949874878, |
| "learning_rate": 1.6566708355962845e-05, |
| "loss": 0.0167, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.0368115372744873, |
| "grad_norm": 0.13232485949993134, |
| "learning_rate": 1.654699254936083e-05, |
| "loss": 0.018, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.0390561432058585, |
| "grad_norm": 0.13724569976329803, |
| "learning_rate": 1.6527232101350017e-05, |
| "loss": 0.0159, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.0413007491372297, |
| "grad_norm": 0.14290913939476013, |
| "learning_rate": 1.6507427146669248e-05, |
| "loss": 0.0169, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.0435453550686007, |
| "grad_norm": 0.1413612961769104, |
| "learning_rate": 1.6487577820360844e-05, |
| "loss": 0.0168, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.045789960999972, |
| "grad_norm": 0.15013279020786285, |
| "learning_rate": 1.6467684257769684e-05, |
| "loss": 0.0163, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.0480345669313431, |
| "grad_norm": 0.142470121383667, |
| "learning_rate": 1.6447746594542273e-05, |
| "loss": 0.0166, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.0502791728627143, |
| "grad_norm": 0.15328320860862732, |
| "learning_rate": 1.6427764966625817e-05, |
| "loss": 0.0161, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.0525237787940855, |
| "grad_norm": 0.13435743749141693, |
| "learning_rate": 1.640773951026731e-05, |
| "loss": 0.0154, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.0547683847254565, |
| "grad_norm": 0.12938353419303894, |
| "learning_rate": 1.638767036201259e-05, |
| "loss": 0.016, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.0570129906568277, |
| "grad_norm": 0.13953883945941925, |
| "learning_rate": 1.636755765870541e-05, |
| "loss": 0.017, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.059257596588199, |
| "grad_norm": 0.14840665459632874, |
| "learning_rate": 1.634740153748651e-05, |
| "loss": 0.0156, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.0615022025195702, |
| "grad_norm": 0.14732645452022552, |
| "learning_rate": 1.6327202135792687e-05, |
| "loss": 0.0153, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.0637468084509414, |
| "grad_norm": 0.15273860096931458, |
| "learning_rate": 1.6306959591355838e-05, |
| "loss": 0.0162, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.0659914143823126, |
| "grad_norm": 0.1429833620786667, |
| "learning_rate": 1.6286674042202044e-05, |
| "loss": 0.0147, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.0682360203136836, |
| "grad_norm": 0.14377611875534058, |
| "learning_rate": 1.6266345626650618e-05, |
| "loss": 0.0153, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.0704806262450548, |
| "grad_norm": 0.1336628645658493, |
| "learning_rate": 1.6245974483313153e-05, |
| "loss": 0.0152, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.072725232176426, |
| "grad_norm": 0.1435423344373703, |
| "learning_rate": 1.6225560751092598e-05, |
| "loss": 0.0163, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.0749698381077972, |
| "grad_norm": 0.14871567487716675, |
| "learning_rate": 1.6205104569182294e-05, |
| "loss": 0.0161, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.0772144440391684, |
| "grad_norm": 0.17508377134799957, |
| "learning_rate": 1.6184606077065023e-05, |
| "loss": 0.0154, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.0794590499705397, |
| "grad_norm": 0.18037168681621552, |
| "learning_rate": 1.616406541451208e-05, |
| "loss": 0.0162, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.0817036559019106, |
| "grad_norm": 0.12987300753593445, |
| "learning_rate": 1.6143482721582285e-05, |
| "loss": 0.0156, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.0839482618332819, |
| "grad_norm": 0.135226309299469, |
| "learning_rate": 1.6122858138621068e-05, |
| "loss": 0.0154, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.086192867764653, |
| "grad_norm": 0.1384623944759369, |
| "learning_rate": 1.610219180625947e-05, |
| "loss": 0.0162, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.0884374736960243, |
| "grad_norm": 0.2058112919330597, |
| "learning_rate": 1.6081483865413215e-05, |
| "loss": 0.0162, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.0906820796273955, |
| "grad_norm": 0.14403843879699707, |
| "learning_rate": 1.6060734457281745e-05, |
| "loss": 0.015, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.0929266855587665, |
| "grad_norm": 0.11267949640750885, |
| "learning_rate": 1.603994372334724e-05, |
| "loss": 0.0147, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.0951712914901377, |
| "grad_norm": 0.1622733771800995, |
| "learning_rate": 1.6019111805373672e-05, |
| "loss": 0.0156, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.097415897421509, |
| "grad_norm": 0.16310657560825348, |
| "learning_rate": 1.5998238845405823e-05, |
| "loss": 0.0157, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.0996605033528801, |
| "grad_norm": 0.11895309388637543, |
| "learning_rate": 1.5977324985768344e-05, |
| "loss": 0.0159, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.1019051092842513, |
| "grad_norm": 0.12787587940692902, |
| "learning_rate": 1.5956370369064733e-05, |
| "loss": 0.0157, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.1041497152156226, |
| "grad_norm": 0.1474785953760147, |
| "learning_rate": 1.5935375138176422e-05, |
| "loss": 0.0162, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.1063943211469935, |
| "grad_norm": 0.13858622312545776, |
| "learning_rate": 1.591433943626177e-05, |
| "loss": 0.0164, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.1086389270783648, |
| "grad_norm": 0.15037216246128082, |
| "learning_rate": 1.5893263406755082e-05, |
| "loss": 0.0166, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.110883533009736, |
| "grad_norm": 0.1463032215833664, |
| "learning_rate": 1.587214719336565e-05, |
| "loss": 0.0152, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.1131281389411072, |
| "grad_norm": 0.14497928321361542, |
| "learning_rate": 1.585099094007676e-05, |
| "loss": 0.0151, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.1153727448724784, |
| "grad_norm": 0.1287575364112854, |
| "learning_rate": 1.5829794791144723e-05, |
| "loss": 0.0157, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.1176173508038496, |
| "grad_norm": 0.1426306962966919, |
| "learning_rate": 1.580855889109787e-05, |
| "loss": 0.0155, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.1198619567352206, |
| "grad_norm": 0.14880262315273285, |
| "learning_rate": 1.578728338473559e-05, |
| "loss": 0.0167, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.1221065626665918, |
| "grad_norm": 0.14157725870609283, |
| "learning_rate": 1.5765968417127325e-05, |
| "loss": 0.0167, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 13368, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4745131970702868e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|