| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.538796861377507, |
| "eval_steps": 500, |
| "global_step": 15000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004359197907585004, |
| "grad_norm": 11.853053092956543, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 2.8048, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.008718395815170008, |
| "grad_norm": 7.921761512756348, |
| "learning_rate": 2.5333333333333334e-06, |
| "loss": 2.7551, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013077593722755012, |
| "grad_norm": 4.87008810043335, |
| "learning_rate": 3.866666666666667e-06, |
| "loss": 2.4567, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.017436791630340016, |
| "grad_norm": 4.188699722290039, |
| "learning_rate": 5.2e-06, |
| "loss": 2.1709, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.021795989537925022, |
| "grad_norm": 3.5124988555908203, |
| "learning_rate": 6.533333333333333e-06, |
| "loss": 1.9095, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.026155187445510025, |
| "grad_norm": 3.8245785236358643, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 1.7831, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03051438535309503, |
| "grad_norm": 2.5836427211761475, |
| "learning_rate": 9.2e-06, |
| "loss": 1.6111, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03487358326068003, |
| "grad_norm": 5.420677185058594, |
| "learning_rate": 1.0533333333333335e-05, |
| "loss": 1.4375, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03923278116826504, |
| "grad_norm": 4.477448463439941, |
| "learning_rate": 1.1866666666666668e-05, |
| "loss": 1.4068, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.043591979075850044, |
| "grad_norm": 1.6446658372879028, |
| "learning_rate": 1.32e-05, |
| "loss": 1.2661, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04795117698343505, |
| "grad_norm": 2.5110535621643066, |
| "learning_rate": 1.4533333333333335e-05, |
| "loss": 1.2488, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05231037489102005, |
| "grad_norm": 1.5065827369689941, |
| "learning_rate": 1.586666666666667e-05, |
| "loss": 1.1814, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05666957279860506, |
| "grad_norm": 3.6299448013305664, |
| "learning_rate": 1.7199999999999998e-05, |
| "loss": 1.187, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06102877070619006, |
| "grad_norm": 1.9311683177947998, |
| "learning_rate": 1.8533333333333334e-05, |
| "loss": 1.1004, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06538796861377506, |
| "grad_norm": 0.9780636429786682, |
| "learning_rate": 1.9866666666666667e-05, |
| "loss": 1.094, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06974716652136007, |
| "grad_norm": 1.6050626039505005, |
| "learning_rate": 2.12e-05, |
| "loss": 1.1096, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07410636442894507, |
| "grad_norm": 3.1172068119049072, |
| "learning_rate": 2.2533333333333333e-05, |
| "loss": 1.1344, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07846556233653008, |
| "grad_norm": 3.489858865737915, |
| "learning_rate": 2.3866666666666666e-05, |
| "loss": 1.0982, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08282476024411509, |
| "grad_norm": 2.0369985103607178, |
| "learning_rate": 2.5200000000000003e-05, |
| "loss": 1.1281, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08718395815170009, |
| "grad_norm": 1.0101550817489624, |
| "learning_rate": 2.6533333333333332e-05, |
| "loss": 1.0518, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09154315605928509, |
| "grad_norm": 1.3004437685012817, |
| "learning_rate": 2.786666666666667e-05, |
| "loss": 1.0985, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0959023539668701, |
| "grad_norm": 6.565486431121826, |
| "learning_rate": 2.9199999999999998e-05, |
| "loss": 1.1108, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1002615518744551, |
| "grad_norm": 0.6993623375892639, |
| "learning_rate": 3.0533333333333335e-05, |
| "loss": 1.1143, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1046207497820401, |
| "grad_norm": 5.090579986572266, |
| "learning_rate": 3.1866666666666664e-05, |
| "loss": 1.067, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10897994768962511, |
| "grad_norm": 1.5457755327224731, |
| "learning_rate": 3.32e-05, |
| "loss": 1.0876, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11333914559721012, |
| "grad_norm": 1.9382998943328857, |
| "learning_rate": 3.453333333333334e-05, |
| "loss": 1.0713, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11769834350479512, |
| "grad_norm": 3.967763900756836, |
| "learning_rate": 3.586666666666667e-05, |
| "loss": 1.0995, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12205754141238012, |
| "grad_norm": 1.1996185779571533, |
| "learning_rate": 3.72e-05, |
| "loss": 1.1083, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.12641673931996514, |
| "grad_norm": 1.581020474433899, |
| "learning_rate": 3.853333333333334e-05, |
| "loss": 1.0617, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13077593722755013, |
| "grad_norm": 1.5426340103149414, |
| "learning_rate": 3.986666666666667e-05, |
| "loss": 1.0893, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13513513513513514, |
| "grad_norm": 1.2590006589889526, |
| "learning_rate": 4.12e-05, |
| "loss": 1.0718, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.13949433304272013, |
| "grad_norm": 0.9600476026535034, |
| "learning_rate": 4.2533333333333335e-05, |
| "loss": 1.0769, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14385353095030515, |
| "grad_norm": 1.3560572862625122, |
| "learning_rate": 4.3866666666666665e-05, |
| "loss": 1.0867, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.14821272885789014, |
| "grad_norm": 3.1593310832977295, |
| "learning_rate": 4.52e-05, |
| "loss": 1.0845, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15257192676547515, |
| "grad_norm": 1.2943660020828247, |
| "learning_rate": 4.653333333333334e-05, |
| "loss": 1.0656, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15693112467306017, |
| "grad_norm": 0.8209326267242432, |
| "learning_rate": 4.7866666666666674e-05, |
| "loss": 1.0717, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 1.0528558492660522, |
| "learning_rate": 4.92e-05, |
| "loss": 1.0956, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16564952048823017, |
| "grad_norm": 0.7630209922790527, |
| "learning_rate": 5.053333333333333e-05, |
| "loss": 1.1068, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17000871839581516, |
| "grad_norm": 1.6207125186920166, |
| "learning_rate": 5.1866666666666676e-05, |
| "loss": 1.0622, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17436791630340018, |
| "grad_norm": 0.951837956905365, |
| "learning_rate": 5.3200000000000006e-05, |
| "loss": 1.0681, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17872711421098517, |
| "grad_norm": 1.2214558124542236, |
| "learning_rate": 5.4533333333333335e-05, |
| "loss": 1.0688, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.18308631211857018, |
| "grad_norm": 2.2449309825897217, |
| "learning_rate": 5.5866666666666665e-05, |
| "loss": 1.084, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1874455100261552, |
| "grad_norm": 1.1216390132904053, |
| "learning_rate": 5.72e-05, |
| "loss": 1.0733, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1918047079337402, |
| "grad_norm": 0.7358635067939758, |
| "learning_rate": 5.853333333333334e-05, |
| "loss": 1.0753, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1961639058413252, |
| "grad_norm": 0.8561913371086121, |
| "learning_rate": 5.9866666666666674e-05, |
| "loss": 1.0714, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2005231037489102, |
| "grad_norm": 1.0774980783462524, |
| "learning_rate": 6.12e-05, |
| "loss": 1.0618, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2048823016564952, |
| "grad_norm": 1.3355389833450317, |
| "learning_rate": 6.253333333333333e-05, |
| "loss": 1.0681, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2092414995640802, |
| "grad_norm": 0.6401084065437317, |
| "learning_rate": 6.386666666666667e-05, |
| "loss": 1.0583, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2136006974716652, |
| "grad_norm": 1.3419454097747803, |
| "learning_rate": 6.52e-05, |
| "loss": 1.074, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.21795989537925023, |
| "grad_norm": 0.993840217590332, |
| "learning_rate": 6.653333333333334e-05, |
| "loss": 1.0666, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22231909328683522, |
| "grad_norm": 1.9545044898986816, |
| "learning_rate": 6.786666666666667e-05, |
| "loss": 1.0842, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.22667829119442023, |
| "grad_norm": 0.7455446124076843, |
| "learning_rate": 6.92e-05, |
| "loss": 1.057, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.23103748910200522, |
| "grad_norm": 1.0625560283660889, |
| "learning_rate": 7.053333333333334e-05, |
| "loss": 1.0793, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.23539668700959024, |
| "grad_norm": 1.8029131889343262, |
| "learning_rate": 7.186666666666667e-05, |
| "loss": 1.0806, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.23975588491717523, |
| "grad_norm": 0.7211843729019165, |
| "learning_rate": 7.32e-05, |
| "loss": 1.0851, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.24411508282476024, |
| "grad_norm": 1.2813831567764282, |
| "learning_rate": 7.453333333333333e-05, |
| "loss": 1.0613, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.24847428073234526, |
| "grad_norm": 1.113389015197754, |
| "learning_rate": 7.586666666666668e-05, |
| "loss": 1.0536, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2528334786399303, |
| "grad_norm": 1.0559017658233643, |
| "learning_rate": 7.72e-05, |
| "loss": 1.0626, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.25719267654751526, |
| "grad_norm": 2.7463810443878174, |
| "learning_rate": 7.853333333333334e-05, |
| "loss": 1.0652, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26155187445510025, |
| "grad_norm": 1.103918433189392, |
| "learning_rate": 7.986666666666667e-05, |
| "loss": 1.0634, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.26591107236268524, |
| "grad_norm": 1.6153513193130493, |
| "learning_rate": 8.120000000000001e-05, |
| "loss": 1.0869, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 1.8530434370040894, |
| "learning_rate": 8.253333333333334e-05, |
| "loss": 1.0693, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2746294681778553, |
| "grad_norm": 0.752463161945343, |
| "learning_rate": 8.386666666666667e-05, |
| "loss": 1.0919, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.27898866608544026, |
| "grad_norm": 0.6594322323799133, |
| "learning_rate": 8.52e-05, |
| "loss": 1.0756, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2833478639930253, |
| "grad_norm": 0.9615456461906433, |
| "learning_rate": 8.653333333333333e-05, |
| "loss": 1.0839, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2877070619006103, |
| "grad_norm": 0.9804999828338623, |
| "learning_rate": 8.786666666666667e-05, |
| "loss": 1.0633, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2920662598081953, |
| "grad_norm": 1.1694570779800415, |
| "learning_rate": 8.92e-05, |
| "loss": 1.0849, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.29642545771578027, |
| "grad_norm": 1.0795670747756958, |
| "learning_rate": 9.053333333333334e-05, |
| "loss": 1.0552, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3007846556233653, |
| "grad_norm": 1.1118414402008057, |
| "learning_rate": 9.186666666666667e-05, |
| "loss": 1.0753, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3051438535309503, |
| "grad_norm": 1.0514802932739258, |
| "learning_rate": 9.320000000000002e-05, |
| "loss": 1.0641, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3095030514385353, |
| "grad_norm": 0.9771641492843628, |
| "learning_rate": 9.453333333333335e-05, |
| "loss": 1.0756, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.31386224934612034, |
| "grad_norm": 2.385375499725342, |
| "learning_rate": 9.586666666666667e-05, |
| "loss": 1.0475, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3182214472537053, |
| "grad_norm": 1.542068600654602, |
| "learning_rate": 9.72e-05, |
| "loss": 1.0494, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 2.3379392623901367, |
| "learning_rate": 9.853333333333333e-05, |
| "loss": 1.0553, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3269398430688753, |
| "grad_norm": 0.5032880902290344, |
| "learning_rate": 9.986666666666668e-05, |
| "loss": 1.0873, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.33129904097646035, |
| "grad_norm": 1.1064118146896362, |
| "learning_rate": 9.999990157738453e-05, |
| "loss": 1.0807, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.33565823888404533, |
| "grad_norm": 0.9185916781425476, |
| "learning_rate": 9.999956135155687e-05, |
| "loss": 1.0868, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3400174367916303, |
| "grad_norm": 1.1694447994232178, |
| "learning_rate": 9.99989781090763e-05, |
| "loss": 1.098, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.34437663469921537, |
| "grad_norm": 0.6519765257835388, |
| "learning_rate": 9.999815185277755e-05, |
| "loss": 1.0567, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.34873583260680036, |
| "grad_norm": 1.022257924079895, |
| "learning_rate": 9.999708258667652e-05, |
| "loss": 1.0396, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.35309503051438534, |
| "grad_norm": 0.7446913719177246, |
| "learning_rate": 9.999577031597029e-05, |
| "loss": 1.0574, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.35745422842197033, |
| "grad_norm": 0.6779941916465759, |
| "learning_rate": 9.999421504703696e-05, |
| "loss": 1.0582, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3618134263295554, |
| "grad_norm": 0.8097116351127625, |
| "learning_rate": 9.999241678743574e-05, |
| "loss": 1.0507, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.36617262423714037, |
| "grad_norm": 1.2719435691833496, |
| "learning_rate": 9.999037554590683e-05, |
| "loss": 1.0814, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.37053182214472535, |
| "grad_norm": 1.1539487838745117, |
| "learning_rate": 9.998809133237143e-05, |
| "loss": 1.0376, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3748910200523104, |
| "grad_norm": 3.295795440673828, |
| "learning_rate": 9.998556415793169e-05, |
| "loss": 1.0528, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3792502179598954, |
| "grad_norm": 0.8902769088745117, |
| "learning_rate": 9.998279403487062e-05, |
| "loss": 1.0357, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3836094158674804, |
| "grad_norm": 0.5429982542991638, |
| "learning_rate": 9.997978097665205e-05, |
| "loss": 1.0108, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.38796861377506536, |
| "grad_norm": 0.8729974627494812, |
| "learning_rate": 9.99765249979206e-05, |
| "loss": 1.0723, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3923278116826504, |
| "grad_norm": 1.0210973024368286, |
| "learning_rate": 9.997302611450154e-05, |
| "loss": 1.0844, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3966870095902354, |
| "grad_norm": 0.8282007575035095, |
| "learning_rate": 9.996928434340073e-05, |
| "loss": 1.0487, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4010462074978204, |
| "grad_norm": 0.6480956077575684, |
| "learning_rate": 9.996529970280462e-05, |
| "loss": 1.0471, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.40540540540540543, |
| "grad_norm": 0.5509655475616455, |
| "learning_rate": 9.996107221208004e-05, |
| "loss": 1.0526, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4097646033129904, |
| "grad_norm": 1.1537059545516968, |
| "learning_rate": 9.995660189177419e-05, |
| "loss": 1.0561, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4141238012205754, |
| "grad_norm": 2.260296583175659, |
| "learning_rate": 9.995188876361451e-05, |
| "loss": 1.0208, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4184829991281604, |
| "grad_norm": 0.829742431640625, |
| "learning_rate": 9.994693285050857e-05, |
| "loss": 1.0498, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.42284219703574544, |
| "grad_norm": 0.6473196148872375, |
| "learning_rate": 9.994173417654395e-05, |
| "loss": 1.0367, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4272013949433304, |
| "grad_norm": 0.6572287678718567, |
| "learning_rate": 9.993629276698821e-05, |
| "loss": 1.0416, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4315605928509154, |
| "grad_norm": 0.47851496934890747, |
| "learning_rate": 9.993060864828858e-05, |
| "loss": 1.0464, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.43591979075850046, |
| "grad_norm": 0.43127620220184326, |
| "learning_rate": 9.992468184807206e-05, |
| "loss": 1.0678, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.44027898866608545, |
| "grad_norm": 0.5899858474731445, |
| "learning_rate": 9.991851239514511e-05, |
| "loss": 1.0726, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.44463818657367044, |
| "grad_norm": 0.6047391295433044, |
| "learning_rate": 9.991210031949359e-05, |
| "loss": 1.0497, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4489973844812554, |
| "grad_norm": 0.5648536682128906, |
| "learning_rate": 9.990544565228259e-05, |
| "loss": 1.0402, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.45335658238884047, |
| "grad_norm": 0.5038257241249084, |
| "learning_rate": 9.989854842585631e-05, |
| "loss": 1.0459, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.45771578029642546, |
| "grad_norm": 0.5015464425086975, |
| "learning_rate": 9.989140867373783e-05, |
| "loss": 1.0453, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.46207497820401044, |
| "grad_norm": 0.6012107729911804, |
| "learning_rate": 9.988402643062907e-05, |
| "loss": 1.0527, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4664341761115955, |
| "grad_norm": 0.8822391033172607, |
| "learning_rate": 9.987640173241046e-05, |
| "loss": 1.0598, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4707933740191805, |
| "grad_norm": 0.8211294412612915, |
| "learning_rate": 9.986853461614093e-05, |
| "loss": 1.0355, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.47515257192676547, |
| "grad_norm": 0.5536686182022095, |
| "learning_rate": 9.986042512005763e-05, |
| "loss": 1.0622, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.47951176983435045, |
| "grad_norm": 0.7554659247398376, |
| "learning_rate": 9.985207328357573e-05, |
| "loss": 1.038, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 0.6661390066146851, |
| "learning_rate": 9.984347914728829e-05, |
| "loss": 1.0291, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.4882301656495205, |
| "grad_norm": 0.5693694353103638, |
| "learning_rate": 9.983464275296605e-05, |
| "loss": 1.0499, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4925893635571055, |
| "grad_norm": 0.3903997838497162, |
| "learning_rate": 9.982556414355724e-05, |
| "loss": 1.0945, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4969485614646905, |
| "grad_norm": 0.534357488155365, |
| "learning_rate": 9.981624336318726e-05, |
| "loss": 1.0483, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5013077593722755, |
| "grad_norm": 0.7887455224990845, |
| "learning_rate": 9.980668045715864e-05, |
| "loss": 1.0626, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5056669572798606, |
| "grad_norm": 1.018442988395691, |
| "learning_rate": 9.979687547195066e-05, |
| "loss": 1.0215, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5100261551874455, |
| "grad_norm": 0.8873037099838257, |
| "learning_rate": 9.978682845521927e-05, |
| "loss": 1.082, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5143853530950305, |
| "grad_norm": 0.523079514503479, |
| "learning_rate": 9.977653945579673e-05, |
| "loss": 1.0835, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5187445510026155, |
| "grad_norm": 0.5072969198226929, |
| "learning_rate": 9.976600852369144e-05, |
| "loss": 1.0621, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5231037489102005, |
| "grad_norm": 0.9282866716384888, |
| "learning_rate": 9.975523571008769e-05, |
| "loss": 1.0583, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5274629468177855, |
| "grad_norm": 1.1153844594955444, |
| "learning_rate": 9.97442210673454e-05, |
| "loss": 1.0592, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5318221447253705, |
| "grad_norm": 0.5975160598754883, |
| "learning_rate": 9.973296464899988e-05, |
| "loss": 1.0872, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5361813426329556, |
| "grad_norm": 0.6239467859268188, |
| "learning_rate": 9.972146650976154e-05, |
| "loss": 1.0563, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.4678373336791992, |
| "learning_rate": 9.970972670551566e-05, |
| "loss": 1.0855, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5448997384481256, |
| "grad_norm": 0.6621468663215637, |
| "learning_rate": 9.969774529332212e-05, |
| "loss": 1.0673, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5492589363557105, |
| "grad_norm": 0.6079438328742981, |
| "learning_rate": 9.968552233141504e-05, |
| "loss": 1.0705, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5536181342632955, |
| "grad_norm": 0.57035231590271, |
| "learning_rate": 9.967305787920264e-05, |
| "loss": 1.0736, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.5579773321708805, |
| "grad_norm": 0.7185502052307129, |
| "learning_rate": 9.966035199726684e-05, |
| "loss": 1.0673, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5623365300784655, |
| "grad_norm": 0.6128619909286499, |
| "learning_rate": 9.9647404747363e-05, |
| "loss": 1.0699, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5666957279860506, |
| "grad_norm": 0.5556525588035583, |
| "learning_rate": 9.96342161924196e-05, |
| "loss": 1.0554, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5710549258936356, |
| "grad_norm": 0.6182255148887634, |
| "learning_rate": 9.962078639653797e-05, |
| "loss": 1.0799, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5754141238012206, |
| "grad_norm": 0.7383370399475098, |
| "learning_rate": 9.960711542499202e-05, |
| "loss": 1.0533, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5797733217088056, |
| "grad_norm": 0.4465934932231903, |
| "learning_rate": 9.959320334422772e-05, |
| "loss": 1.059, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5841325196163906, |
| "grad_norm": 1.1247705221176147, |
| "learning_rate": 9.957905022186309e-05, |
| "loss": 1.0402, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5884917175239756, |
| "grad_norm": 0.7212786674499512, |
| "learning_rate": 9.956465612668757e-05, |
| "loss": 1.0318, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5928509154315605, |
| "grad_norm": 0.6343246698379517, |
| "learning_rate": 9.95500211286619e-05, |
| "loss": 1.0641, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5972101133391456, |
| "grad_norm": 0.8640972971916199, |
| "learning_rate": 9.953514529891763e-05, |
| "loss": 1.0435, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6015693112467306, |
| "grad_norm": 1.0832066535949707, |
| "learning_rate": 9.952002870975693e-05, |
| "loss": 1.0402, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6059285091543156, |
| "grad_norm": 0.8250817060470581, |
| "learning_rate": 9.950467143465207e-05, |
| "loss": 1.0501, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6102877070619006, |
| "grad_norm": 0.8554821014404297, |
| "learning_rate": 9.94890735482452e-05, |
| "loss": 1.0527, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6146469049694856, |
| "grad_norm": 0.6301167011260986, |
| "learning_rate": 9.947323512634788e-05, |
| "loss": 1.069, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6190061028770706, |
| "grad_norm": 0.5204041600227356, |
| "learning_rate": 9.945715624594081e-05, |
| "loss": 1.0728, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6233653007846556, |
| "grad_norm": 0.8133730888366699, |
| "learning_rate": 9.944083698517339e-05, |
| "loss": 1.0364, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6277244986922407, |
| "grad_norm": 1.3038055896759033, |
| "learning_rate": 9.942427742336334e-05, |
| "loss": 1.0204, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6320836965998257, |
| "grad_norm": 0.687967836856842, |
| "learning_rate": 9.940747764099638e-05, |
| "loss": 1.032, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6364428945074107, |
| "grad_norm": 0.5078476667404175, |
| "learning_rate": 9.939043771972574e-05, |
| "loss": 1.0334, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6408020924149956, |
| "grad_norm": 0.5568360090255737, |
| "learning_rate": 9.937315774237186e-05, |
| "loss": 1.0348, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.6646256446838379, |
| "learning_rate": 9.93556377929219e-05, |
| "loss": 1.0356, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6495204882301656, |
| "grad_norm": 0.6583265662193298, |
| "learning_rate": 9.933787795652942e-05, |
| "loss": 1.0468, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.6538796861377506, |
| "grad_norm": 0.9695225358009338, |
| "learning_rate": 9.931987831951386e-05, |
| "loss": 1.0574, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6582388840453357, |
| "grad_norm": 0.5819401741027832, |
| "learning_rate": 9.930163896936027e-05, |
| "loss": 1.0484, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6625980819529207, |
| "grad_norm": 0.7117506265640259, |
| "learning_rate": 9.92831599947187e-05, |
| "loss": 1.0333, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6669572798605057, |
| "grad_norm": 1.2941336631774902, |
| "learning_rate": 9.926444148540393e-05, |
| "loss": 1.0536, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6713164777680907, |
| "grad_norm": 0.5326636433601379, |
| "learning_rate": 9.924548353239495e-05, |
| "loss": 1.0346, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6756756756756757, |
| "grad_norm": 0.4321506917476654, |
| "learning_rate": 9.922628622783451e-05, |
| "loss": 1.0311, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6800348735832606, |
| "grad_norm": 0.7744588851928711, |
| "learning_rate": 9.920684966502878e-05, |
| "loss": 1.0428, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6843940714908456, |
| "grad_norm": 0.456521600484848, |
| "learning_rate": 9.918717393844669e-05, |
| "loss": 1.0725, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6887532693984307, |
| "grad_norm": 1.024055004119873, |
| "learning_rate": 9.916725914371969e-05, |
| "loss": 1.0228, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6931124673060157, |
| "grad_norm": 0.7041059732437134, |
| "learning_rate": 9.914710537764117e-05, |
| "loss": 1.056, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6974716652136007, |
| "grad_norm": 0.5206664800643921, |
| "learning_rate": 9.912671273816601e-05, |
| "loss": 1.0437, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7018308631211857, |
| "grad_norm": 0.5767226815223694, |
| "learning_rate": 9.910608132441008e-05, |
| "loss": 1.0398, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7061900610287707, |
| "grad_norm": 1.048291802406311, |
| "learning_rate": 9.908521123664981e-05, |
| "loss": 1.0063, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7105492589363557, |
| "grad_norm": 0.4653307795524597, |
| "learning_rate": 9.906410257632168e-05, |
| "loss": 1.0541, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7149084568439407, |
| "grad_norm": 0.6922876834869385, |
| "learning_rate": 9.904275544602169e-05, |
| "loss": 1.0439, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7192676547515258, |
| "grad_norm": 0.4627837538719177, |
| "learning_rate": 9.902116994950493e-05, |
| "loss": 1.0504, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7236268526591108, |
| "grad_norm": 1.1777377128601074, |
| "learning_rate": 9.899934619168501e-05, |
| "loss": 1.0402, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7279860505666957, |
| "grad_norm": 0.6930601000785828, |
| "learning_rate": 9.89772842786336e-05, |
| "loss": 1.0721, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7323452484742807, |
| "grad_norm": 1.1171027421951294, |
| "learning_rate": 9.895498431757989e-05, |
| "loss": 1.062, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7367044463818657, |
| "grad_norm": 0.7950462102890015, |
| "learning_rate": 9.893244641691006e-05, |
| "loss": 1.0624, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7410636442894507, |
| "grad_norm": 1.396531581878662, |
| "learning_rate": 9.890967068616677e-05, |
| "loss": 1.0489, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7454228421970357, |
| "grad_norm": 0.8184282183647156, |
| "learning_rate": 9.888665723604864e-05, |
| "loss": 1.05, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.7497820401046208, |
| "grad_norm": 1.0491915941238403, |
| "learning_rate": 9.886340617840968e-05, |
| "loss": 1.0579, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7541412380122058, |
| "grad_norm": 0.5465502142906189, |
| "learning_rate": 9.883991762625876e-05, |
| "loss": 1.0527, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7585004359197908, |
| "grad_norm": 0.7717880010604858, |
| "learning_rate": 9.881619169375908e-05, |
| "loss": 1.0409, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.7628596338273758, |
| "grad_norm": 0.7524123191833496, |
| "learning_rate": 9.879222849622758e-05, |
| "loss": 1.0472, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7672188317349607, |
| "grad_norm": 0.7540025115013123, |
| "learning_rate": 9.876802815013439e-05, |
| "loss": 1.0535, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7715780296425457, |
| "grad_norm": 0.5446664094924927, |
| "learning_rate": 9.87435907731023e-05, |
| "loss": 1.0344, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.7759372275501307, |
| "grad_norm": 0.5577282309532166, |
| "learning_rate": 9.871891648390614e-05, |
| "loss": 1.0484, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7802964254577158, |
| "grad_norm": 0.6824474930763245, |
| "learning_rate": 9.869400540247223e-05, |
| "loss": 1.0257, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.7846556233653008, |
| "grad_norm": 1.3244407176971436, |
| "learning_rate": 9.866885764987776e-05, |
| "loss": 1.0293, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7890148212728858, |
| "grad_norm": 0.7491902709007263, |
| "learning_rate": 9.86434733483503e-05, |
| "loss": 1.0338, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.7933740191804708, |
| "grad_norm": 0.7118448615074158, |
| "learning_rate": 9.861785262126705e-05, |
| "loss": 1.0245, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7977332170880558, |
| "grad_norm": 0.7211303114891052, |
| "learning_rate": 9.85919955931544e-05, |
| "loss": 1.0225, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8020924149956408, |
| "grad_norm": 0.5354200601577759, |
| "learning_rate": 9.856590238968721e-05, |
| "loss": 1.0291, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 0.42781445384025574, |
| "learning_rate": 9.853957313768824e-05, |
| "loss": 1.0356, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.6619517207145691, |
| "learning_rate": 9.851300796512755e-05, |
| "loss": 1.0363, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8151700087183958, |
| "grad_norm": 0.9382469058036804, |
| "learning_rate": 9.848620700112188e-05, |
| "loss": 1.01, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8195292066259808, |
| "grad_norm": 0.9875907301902771, |
| "learning_rate": 9.845917037593396e-05, |
| "loss": 1.048, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8238884045335658, |
| "grad_norm": 0.6974698305130005, |
| "learning_rate": 9.843189822097196e-05, |
| "loss": 1.0514, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8282476024411508, |
| "grad_norm": 0.5646869540214539, |
| "learning_rate": 9.84043906687888e-05, |
| "loss": 1.0436, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8326068003487358, |
| "grad_norm": 0.5306071639060974, |
| "learning_rate": 9.837664785308149e-05, |
| "loss": 1.0578, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8369659982563208, |
| "grad_norm": 0.4362487494945526, |
| "learning_rate": 9.834866990869059e-05, |
| "loss": 1.0183, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8413251961639059, |
| "grad_norm": 0.5740931630134583, |
| "learning_rate": 9.832045697159938e-05, |
| "loss": 1.0421, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.8456843940714909, |
| "grad_norm": 0.37821829319000244, |
| "learning_rate": 9.829200917893334e-05, |
| "loss": 1.0275, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8500435919790759, |
| "grad_norm": 0.567612886428833, |
| "learning_rate": 9.826332666895944e-05, |
| "loss": 1.0372, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8544027898866609, |
| "grad_norm": 0.512275755405426, |
| "learning_rate": 9.823440958108545e-05, |
| "loss": 1.0482, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.8587619877942458, |
| "grad_norm": 0.7865485548973083, |
| "learning_rate": 9.820525805585927e-05, |
| "loss": 1.0437, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8631211857018308, |
| "grad_norm": 0.6144497394561768, |
| "learning_rate": 9.81758722349683e-05, |
| "loss": 1.0411, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.8674803836094158, |
| "grad_norm": 0.7085159420967102, |
| "learning_rate": 9.814625226123862e-05, |
| "loss": 1.0431, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.8718395815170009, |
| "grad_norm": 0.5472831130027771, |
| "learning_rate": 9.811639827863449e-05, |
| "loss": 1.047, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8761987794245859, |
| "grad_norm": 0.6783573627471924, |
| "learning_rate": 9.808631043225741e-05, |
| "loss": 1.0448, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.8805579773321709, |
| "grad_norm": 0.7649420499801636, |
| "learning_rate": 9.805598886834567e-05, |
| "loss": 1.0241, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.8849171752397559, |
| "grad_norm": 0.5474071502685547, |
| "learning_rate": 9.802543373427344e-05, |
| "loss": 1.035, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.8892763731473409, |
| "grad_norm": 0.40244612097740173, |
| "learning_rate": 9.799464517855018e-05, |
| "loss": 1.0289, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8936355710549259, |
| "grad_norm": 0.5829120874404907, |
| "learning_rate": 9.79636233508198e-05, |
| "loss": 1.0308, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8979947689625108, |
| "grad_norm": 0.6150997877120972, |
| "learning_rate": 9.793236840186005e-05, |
| "loss": 1.0344, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.902353966870096, |
| "grad_norm": 0.6614237427711487, |
| "learning_rate": 9.790088048358175e-05, |
| "loss": 1.0162, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9067131647776809, |
| "grad_norm": 0.5127846598625183, |
| "learning_rate": 9.786915974902798e-05, |
| "loss": 1.0383, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9110723626852659, |
| "grad_norm": 0.6490142941474915, |
| "learning_rate": 9.783720635237343e-05, |
| "loss": 1.0134, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9154315605928509, |
| "grad_norm": 0.6402510404586792, |
| "learning_rate": 9.780502044892362e-05, |
| "loss": 1.0332, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9197907585004359, |
| "grad_norm": 0.4572957754135132, |
| "learning_rate": 9.777260219511415e-05, |
| "loss": 1.0204, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9241499564080209, |
| "grad_norm": 1.4024547338485718, |
| "learning_rate": 9.773995174850989e-05, |
| "loss": 1.0246, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9285091543156059, |
| "grad_norm": 0.765164852142334, |
| "learning_rate": 9.770706926780428e-05, |
| "loss": 1.0217, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.932868352223191, |
| "grad_norm": 0.8015472292900085, |
| "learning_rate": 9.767395491281855e-05, |
| "loss": 1.0586, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.937227550130776, |
| "grad_norm": 0.7269527912139893, |
| "learning_rate": 9.764060884450086e-05, |
| "loss": 1.0121, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.941586748038361, |
| "grad_norm": 0.7793417572975159, |
| "learning_rate": 9.76070312249257e-05, |
| "loss": 1.0036, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9459459459459459, |
| "grad_norm": 0.6566328406333923, |
| "learning_rate": 9.757322221729283e-05, |
| "loss": 1.024, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9503051438535309, |
| "grad_norm": 0.7390000224113464, |
| "learning_rate": 9.753918198592682e-05, |
| "loss": 1.0305, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9546643417611159, |
| "grad_norm": 0.6735124588012695, |
| "learning_rate": 9.750491069627593e-05, |
| "loss": 1.0197, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9590235396687009, |
| "grad_norm": 1.4354088306427002, |
| "learning_rate": 9.747040851491149e-05, |
| "loss": 1.0231, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.963382737576286, |
| "grad_norm": 0.5722385048866272, |
| "learning_rate": 9.743567560952711e-05, |
| "loss": 1.008, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 1.234014868736267, |
| "learning_rate": 9.740071214893773e-05, |
| "loss": 1.0208, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.972101133391456, |
| "grad_norm": 0.6652539372444153, |
| "learning_rate": 9.736551830307892e-05, |
| "loss": 1.0306, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.976460331299041, |
| "grad_norm": 0.5687094330787659, |
| "learning_rate": 9.733009424300597e-05, |
| "loss": 1.0456, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.980819529206626, |
| "grad_norm": 1.096685528755188, |
| "learning_rate": 9.729444014089314e-05, |
| "loss": 1.0357, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.985178727114211, |
| "grad_norm": 0.6169744729995728, |
| "learning_rate": 9.725855617003275e-05, |
| "loss": 1.0044, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.9895379250217959, |
| "grad_norm": 0.7805534601211548, |
| "learning_rate": 9.72224425048344e-05, |
| "loss": 1.0375, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.993897122929381, |
| "grad_norm": 1.3868515491485596, |
| "learning_rate": 9.718609932082405e-05, |
| "loss": 1.0149, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.998256320836966, |
| "grad_norm": 0.625151515007019, |
| "learning_rate": 9.714952679464323e-05, |
| "loss": 0.9914, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.002615518744551, |
| "grad_norm": 0.7442333102226257, |
| "learning_rate": 9.711272510404816e-05, |
| "loss": 1.0047, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0069747166521361, |
| "grad_norm": 0.561114490032196, |
| "learning_rate": 9.70756944279089e-05, |
| "loss": 1.0157, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.011333914559721, |
| "grad_norm": 0.6227409839630127, |
| "learning_rate": 9.70384349462084e-05, |
| "loss": 1.0276, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.015693112467306, |
| "grad_norm": 1.2291237115859985, |
| "learning_rate": 9.700094684004182e-05, |
| "loss": 0.9942, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.020052310374891, |
| "grad_norm": 1.3991217613220215, |
| "learning_rate": 9.696323029161535e-05, |
| "loss": 1.0272, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.024411508282476, |
| "grad_norm": 1.0560461282730103, |
| "learning_rate": 9.692528548424567e-05, |
| "loss": 1.0041, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.028770706190061, |
| "grad_norm": 1.0611501932144165, |
| "learning_rate": 9.688711260235872e-05, |
| "loss": 0.9916, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.033129904097646, |
| "grad_norm": 0.9067592620849609, |
| "learning_rate": 9.684871183148912e-05, |
| "loss": 1.0055, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.037489102005231, |
| "grad_norm": 1.063376784324646, |
| "learning_rate": 9.681008335827898e-05, |
| "loss": 0.9674, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.041848299912816, |
| "grad_norm": 2.4049572944641113, |
| "learning_rate": 9.677122737047724e-05, |
| "loss": 0.9767, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.046207497820401, |
| "grad_norm": 1.297662377357483, |
| "learning_rate": 9.673214405693857e-05, |
| "loss": 0.919, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.050566695727986, |
| "grad_norm": 0.7325993180274963, |
| "learning_rate": 9.669283360762258e-05, |
| "loss": 0.9641, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.054925893635571, |
| "grad_norm": 1.0571956634521484, |
| "learning_rate": 9.66532962135928e-05, |
| "loss": 0.9962, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.059285091543156, |
| "grad_norm": 1.3534421920776367, |
| "learning_rate": 9.661353206701582e-05, |
| "loss": 0.9791, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.063644289450741, |
| "grad_norm": 1.4913572072982788, |
| "learning_rate": 9.657354136116035e-05, |
| "loss": 0.9379, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.0680034873583262, |
| "grad_norm": 2.4377195835113525, |
| "learning_rate": 9.653332429039625e-05, |
| "loss": 0.9346, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.0723626852659112, |
| "grad_norm": 1.0011940002441406, |
| "learning_rate": 9.649288105019356e-05, |
| "loss": 0.9223, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.0767218831734962, |
| "grad_norm": 1.409762978553772, |
| "learning_rate": 9.645221183712165e-05, |
| "loss": 0.9075, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.0810810810810811, |
| "grad_norm": 1.3468458652496338, |
| "learning_rate": 9.641131684884817e-05, |
| "loss": 0.9658, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.0854402789886661, |
| "grad_norm": 1.6132701635360718, |
| "learning_rate": 9.637019628413813e-05, |
| "loss": 0.9184, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.0897994768962511, |
| "grad_norm": 2.994033098220825, |
| "learning_rate": 9.632885034285291e-05, |
| "loss": 0.934, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.094158674803836, |
| "grad_norm": 1.964921236038208, |
| "learning_rate": 9.628727922594931e-05, |
| "loss": 0.8986, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.098517872711421, |
| "grad_norm": 1.2736964225769043, |
| "learning_rate": 9.624548313547862e-05, |
| "loss": 0.8723, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.102877070619006, |
| "grad_norm": 1.9553364515304565, |
| "learning_rate": 9.620346227458547e-05, |
| "loss": 0.8539, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.107236268526591, |
| "grad_norm": 1.969037652015686, |
| "learning_rate": 9.616121684750712e-05, |
| "loss": 0.8377, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.111595466434176, |
| "grad_norm": 2.0418758392333984, |
| "learning_rate": 9.611874705957215e-05, |
| "loss": 0.8172, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.115954664341761, |
| "grad_norm": 2.196484327316284, |
| "learning_rate": 9.607605311719972e-05, |
| "loss": 0.7764, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.120313862249346, |
| "grad_norm": 2.5240135192871094, |
| "learning_rate": 9.603313522789841e-05, |
| "loss": 0.7373, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.124673060156931, |
| "grad_norm": 3.0054984092712402, |
| "learning_rate": 9.598999360026529e-05, |
| "loss": 0.6584, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.129032258064516, |
| "grad_norm": 3.442847490310669, |
| "learning_rate": 9.59466284439849e-05, |
| "loss": 0.669, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1333914559721012, |
| "grad_norm": 2.902653217315674, |
| "learning_rate": 9.590303996982815e-05, |
| "loss": 0.6999, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1377506538796862, |
| "grad_norm": 2.5193164348602295, |
| "learning_rate": 9.585922838965145e-05, |
| "loss": 0.6424, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1421098517872712, |
| "grad_norm": 3.2627856731414795, |
| "learning_rate": 9.581519391639549e-05, |
| "loss": 0.5839, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1464690496948562, |
| "grad_norm": 4.419332504272461, |
| "learning_rate": 9.577093676408439e-05, |
| "loss": 0.5886, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.1508282476024412, |
| "grad_norm": 3.463974952697754, |
| "learning_rate": 9.572645714782453e-05, |
| "loss": 0.4981, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.1551874455100262, |
| "grad_norm": 3.361687183380127, |
| "learning_rate": 9.568175528380354e-05, |
| "loss": 0.5007, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.1595466434176112, |
| "grad_norm": 2.817034959793091, |
| "learning_rate": 9.56368313892893e-05, |
| "loss": 0.5828, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.1639058413251961, |
| "grad_norm": 5.572615623474121, |
| "learning_rate": 9.55916856826288e-05, |
| "loss": 0.5255, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.1682650392327811, |
| "grad_norm": 2.425114154815674, |
| "learning_rate": 9.554631838324713e-05, |
| "loss": 0.549, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.1726242371403661, |
| "grad_norm": 3.455268621444702, |
| "learning_rate": 9.55007297116464e-05, |
| "loss": 0.4834, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.176983435047951, |
| "grad_norm": 2.3823935985565186, |
| "learning_rate": 9.545491988940472e-05, |
| "loss": 0.4525, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.181342632955536, |
| "grad_norm": 3.978440284729004, |
| "learning_rate": 9.540888913917501e-05, |
| "loss": 0.4156, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.1857018308631213, |
| "grad_norm": 3.4890248775482178, |
| "learning_rate": 9.536263768468401e-05, |
| "loss": 0.4701, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.1900610287707063, |
| "grad_norm": 4.7818379402160645, |
| "learning_rate": 9.531616575073117e-05, |
| "loss": 0.4442, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.1944202266782913, |
| "grad_norm": 3.9928464889526367, |
| "learning_rate": 9.526947356318754e-05, |
| "loss": 0.447, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.1987794245858763, |
| "grad_norm": 3.704075336456299, |
| "learning_rate": 9.52225613489947e-05, |
| "loss": 0.4156, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2031386224934613, |
| "grad_norm": 2.7910592555999756, |
| "learning_rate": 9.517542933616365e-05, |
| "loss": 0.3874, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2074978204010463, |
| "grad_norm": 6.242345809936523, |
| "learning_rate": 9.512807775377366e-05, |
| "loss": 0.3684, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2118570183086312, |
| "grad_norm": 4.7204766273498535, |
| "learning_rate": 9.508050683197121e-05, |
| "loss": 0.3744, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.2162162162162162, |
| "grad_norm": 6.195404052734375, |
| "learning_rate": 9.503271680196888e-05, |
| "loss": 0.3408, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.2205754141238012, |
| "grad_norm": 8.634724617004395, |
| "learning_rate": 9.498470789604413e-05, |
| "loss": 0.3721, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.2249346120313862, |
| "grad_norm": 4.196957111358643, |
| "learning_rate": 9.49364803475383e-05, |
| "loss": 0.4259, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.2292938099389712, |
| "grad_norm": 3.526580810546875, |
| "learning_rate": 9.48880343908554e-05, |
| "loss": 0.352, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.2336530078465562, |
| "grad_norm": 4.0493316650390625, |
| "learning_rate": 9.4839370261461e-05, |
| "loss": 0.3469, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.2380122057541412, |
| "grad_norm": 6.060046672821045, |
| "learning_rate": 9.479048819588098e-05, |
| "loss": 0.3126, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.2423714036617262, |
| "grad_norm": 3.5340590476989746, |
| "learning_rate": 9.474138843170063e-05, |
| "loss": 0.331, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.2467306015693111, |
| "grad_norm": 2.743643045425415, |
| "learning_rate": 9.46920712075632e-05, |
| "loss": 0.2869, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.2510897994768961, |
| "grad_norm": 7.408202171325684, |
| "learning_rate": 9.464253676316893e-05, |
| "loss": 0.3174, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.2554489973844811, |
| "grad_norm": 8.131254196166992, |
| "learning_rate": 9.459278533927384e-05, |
| "loss": 0.2857, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.2598081952920663, |
| "grad_norm": 8.597091674804688, |
| "learning_rate": 9.454281717768854e-05, |
| "loss": 0.2979, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.2641673931996513, |
| "grad_norm": 5.740150451660156, |
| "learning_rate": 9.449263252127708e-05, |
| "loss": 0.2927, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2685265911072363, |
| "grad_norm": 5.028838634490967, |
| "learning_rate": 9.444223161395573e-05, |
| "loss": 0.2927, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.2728857890148213, |
| "grad_norm": 6.9507904052734375, |
| "learning_rate": 9.439161470069184e-05, |
| "loss": 0.3225, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.2772449869224063, |
| "grad_norm": 3.765681266784668, |
| "learning_rate": 9.43407820275026e-05, |
| "loss": 0.309, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.2816041848299913, |
| "grad_norm": 4.51765251159668, |
| "learning_rate": 9.428973384145396e-05, |
| "loss": 0.334, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.2859633827375763, |
| "grad_norm": 6.146585464477539, |
| "learning_rate": 9.423847039065922e-05, |
| "loss": 0.2429, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 4.963317394256592, |
| "learning_rate": 9.418699192427805e-05, |
| "loss": 0.269, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.2946817785527462, |
| "grad_norm": 6.3673996925354, |
| "learning_rate": 9.41352986925151e-05, |
| "loss": 0.2482, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.2990409764603312, |
| "grad_norm": 6.359657287597656, |
| "learning_rate": 9.408339094661895e-05, |
| "loss": 0.2639, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.3034001743679164, |
| "grad_norm": 2.6293511390686035, |
| "learning_rate": 9.40312689388807e-05, |
| "loss": 0.2487, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.3077593722755014, |
| "grad_norm": 4.45819091796875, |
| "learning_rate": 9.397893292263292e-05, |
| "loss": 0.2576, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.3121185701830864, |
| "grad_norm": 7.133167266845703, |
| "learning_rate": 9.392638315224829e-05, |
| "loss": 0.2167, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.3164777680906714, |
| "grad_norm": 7.530590057373047, |
| "learning_rate": 9.387361988313846e-05, |
| "loss": 0.2565, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.3208369659982564, |
| "grad_norm": 5.552865982055664, |
| "learning_rate": 9.38206433717527e-05, |
| "loss": 0.2203, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.3251961639058414, |
| "grad_norm": 3.770548105239868, |
| "learning_rate": 9.376745387557681e-05, |
| "loss": 0.2232, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.3295553618134264, |
| "grad_norm": 5.5245561599731445, |
| "learning_rate": 9.371405165313169e-05, |
| "loss": 0.2363, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.3339145597210114, |
| "grad_norm": 4.505107402801514, |
| "learning_rate": 9.366043696397222e-05, |
| "loss": 0.2091, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.3382737576285963, |
| "grad_norm": 4.21371603012085, |
| "learning_rate": 9.360661006868592e-05, |
| "loss": 0.2356, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.3426329555361813, |
| "grad_norm": 4.076999187469482, |
| "learning_rate": 9.355257122889173e-05, |
| "loss": 0.2194, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.3469921534437663, |
| "grad_norm": 2.4215261936187744, |
| "learning_rate": 9.349832070723871e-05, |
| "loss": 0.2031, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 10.00662612915039, |
| "learning_rate": 9.34438587674048e-05, |
| "loss": 0.2013, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3557105492589363, |
| "grad_norm": 3.5849783420562744, |
| "learning_rate": 9.338918567409545e-05, |
| "loss": 0.1867, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.3600697471665213, |
| "grad_norm": 3.882270336151123, |
| "learning_rate": 9.333430169304247e-05, |
| "loss": 0.1831, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.3644289450741063, |
| "grad_norm": 1.877372145652771, |
| "learning_rate": 9.327920709100259e-05, |
| "loss": 0.1926, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.3687881429816913, |
| "grad_norm": 3.195490598678589, |
| "learning_rate": 9.322390213575631e-05, |
| "loss": 0.2008, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.3731473408892763, |
| "grad_norm": 3.353106737136841, |
| "learning_rate": 9.316838709610648e-05, |
| "loss": 0.2177, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.3775065387968612, |
| "grad_norm": 4.3637237548828125, |
| "learning_rate": 9.311266224187706e-05, |
| "loss": 0.2103, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.3818657367044465, |
| "grad_norm": 3.7159504890441895, |
| "learning_rate": 9.305672784391175e-05, |
| "loss": 0.2003, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.3862249346120314, |
| "grad_norm": 4.276161193847656, |
| "learning_rate": 9.300058417407276e-05, |
| "loss": 0.2439, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.3905841325196164, |
| "grad_norm": 4.765425682067871, |
| "learning_rate": 9.29442315052394e-05, |
| "loss": 0.2352, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.3949433304272014, |
| "grad_norm": 4.818525791168213, |
| "learning_rate": 9.288767011130684e-05, |
| "loss": 0.2355, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3993025283347864, |
| "grad_norm": 3.578951597213745, |
| "learning_rate": 9.283090026718466e-05, |
| "loss": 0.2045, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.4036617262423714, |
| "grad_norm": 15.848716735839844, |
| "learning_rate": 9.277392224879568e-05, |
| "loss": 0.2175, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.4080209241499564, |
| "grad_norm": 9.305033683776855, |
| "learning_rate": 9.271673633307445e-05, |
| "loss": 0.237, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.4123801220575414, |
| "grad_norm": 5.726974010467529, |
| "learning_rate": 9.265934279796602e-05, |
| "loss": 0.243, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.4167393199651264, |
| "grad_norm": 5.116995334625244, |
| "learning_rate": 9.260174192242453e-05, |
| "loss": 0.2599, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.4210985178727114, |
| "grad_norm": 2.4488906860351562, |
| "learning_rate": 9.254393398641185e-05, |
| "loss": 0.23, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.4254577157802966, |
| "grad_norm": 5.382657051086426, |
| "learning_rate": 9.248591927089628e-05, |
| "loss": 0.2062, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.4298169136878816, |
| "grad_norm": 5.034473419189453, |
| "learning_rate": 9.242769805785115e-05, |
| "loss": 0.2262, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.4341761115954665, |
| "grad_norm": 3.196340322494507, |
| "learning_rate": 9.236927063025342e-05, |
| "loss": 0.179, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.4385353095030515, |
| "grad_norm": 6.5333452224731445, |
| "learning_rate": 9.231063727208234e-05, |
| "loss": 0.2302, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.4428945074106365, |
| "grad_norm": 2.912020206451416, |
| "learning_rate": 9.225179826831807e-05, |
| "loss": 0.1975, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.4472537053182215, |
| "grad_norm": 3.3908817768096924, |
| "learning_rate": 9.219275390494024e-05, |
| "loss": 0.1751, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.4516129032258065, |
| "grad_norm": 2.084850788116455, |
| "learning_rate": 9.213350446892668e-05, |
| "loss": 0.1586, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.4559721011333915, |
| "grad_norm": 2.0085716247558594, |
| "learning_rate": 9.207405024825186e-05, |
| "loss": 0.1521, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.4603312990409765, |
| "grad_norm": 4.547303676605225, |
| "learning_rate": 9.201439153188569e-05, |
| "loss": 0.1683, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.4646904969485615, |
| "grad_norm": 12.060445785522461, |
| "learning_rate": 9.19545286097919e-05, |
| "loss": 0.1724, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.4690496948561464, |
| "grad_norm": 2.533823013305664, |
| "learning_rate": 9.189446177292679e-05, |
| "loss": 0.1988, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.4734088927637314, |
| "grad_norm": 8.067084312438965, |
| "learning_rate": 9.183419131323778e-05, |
| "loss": 0.2145, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.4777680906713164, |
| "grad_norm": 5.309566974639893, |
| "learning_rate": 9.177371752366191e-05, |
| "loss": 0.1976, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.4821272885789014, |
| "grad_norm": 5.301851272583008, |
| "learning_rate": 9.171304069812454e-05, |
| "loss": 0.1951, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.4864864864864864, |
| "grad_norm": 3.730698347091675, |
| "learning_rate": 9.165216113153782e-05, |
| "loss": 0.2042, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.4908456843940714, |
| "grad_norm": 6.294808864593506, |
| "learning_rate": 9.159107911979936e-05, |
| "loss": 0.2248, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.4952048823016564, |
| "grad_norm": 4.979791164398193, |
| "learning_rate": 9.152979495979063e-05, |
| "loss": 0.1873, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.4995640802092414, |
| "grad_norm": 3.240687370300293, |
| "learning_rate": 9.146830894937571e-05, |
| "loss": 0.1832, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.5039232781168264, |
| "grad_norm": 1.4726358652114868, |
| "learning_rate": 9.140662138739969e-05, |
| "loss": 0.1798, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.5082824760244113, |
| "grad_norm": 3.543588161468506, |
| "learning_rate": 9.134473257368732e-05, |
| "loss": 0.1774, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.5126416739319966, |
| "grad_norm": 7.755619525909424, |
| "learning_rate": 9.128264280904145e-05, |
| "loss": 0.1684, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.5170008718395815, |
| "grad_norm": 2.366468667984009, |
| "learning_rate": 9.122035239524169e-05, |
| "loss": 0.1566, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.5213600697471665, |
| "grad_norm": 2.500969171524048, |
| "learning_rate": 9.115786163504285e-05, |
| "loss": 0.1442, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.5257192676547515, |
| "grad_norm": 3.558485984802246, |
| "learning_rate": 9.10951708321735e-05, |
| "loss": 0.1739, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5300784655623365, |
| "grad_norm": 5.444303512573242, |
| "learning_rate": 9.10322802913345e-05, |
| "loss": 0.2408, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.5344376634699215, |
| "grad_norm": 5.714164733886719, |
| "learning_rate": 9.096919031819751e-05, |
| "loss": 0.1765, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.5387968613775065, |
| "grad_norm": 4.49392557144165, |
| "learning_rate": 9.090590121940348e-05, |
| "loss": 0.2224, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.5431560592850917, |
| "grad_norm": 3.7500405311584473, |
| "learning_rate": 9.084241330256121e-05, |
| "loss": 0.1803, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.5475152571926767, |
| "grad_norm": 5.087535858154297, |
| "learning_rate": 9.077872687624586e-05, |
| "loss": 0.209, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.5518744551002617, |
| "grad_norm": 2.011319875717163, |
| "learning_rate": 9.071484224999735e-05, |
| "loss": 0.1479, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.5562336530078467, |
| "grad_norm": 2.3110170364379883, |
| "learning_rate": 9.0650759734319e-05, |
| "loss": 0.1551, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.5605928509154317, |
| "grad_norm": 7.891526222229004, |
| "learning_rate": 9.05864796406759e-05, |
| "loss": 0.1787, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.5649520488230166, |
| "grad_norm": 1.9730972051620483, |
| "learning_rate": 9.052200228149343e-05, |
| "loss": 0.1774, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.5693112467306016, |
| "grad_norm": 2.8834328651428223, |
| "learning_rate": 9.04573279701558e-05, |
| "loss": 0.1431, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.5736704446381866, |
| "grad_norm": 1.477083444595337, |
| "learning_rate": 9.039245702100448e-05, |
| "loss": 0.1366, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.5780296425457716, |
| "grad_norm": 3.89342999458313, |
| "learning_rate": 9.032738974933664e-05, |
| "loss": 0.1327, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.5823888404533566, |
| "grad_norm": 7.806596279144287, |
| "learning_rate": 9.026212647140365e-05, |
| "loss": 0.1546, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.5867480383609416, |
| "grad_norm": 9.280295372009277, |
| "learning_rate": 9.019666750440956e-05, |
| "loss": 0.1438, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.5911072362685266, |
| "grad_norm": 4.350841999053955, |
| "learning_rate": 9.013101316650956e-05, |
| "loss": 0.1765, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.5954664341761116, |
| "grad_norm": 1.526946783065796, |
| "learning_rate": 9.00651637768084e-05, |
| "loss": 0.1389, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.5998256320836965, |
| "grad_norm": 1.9134260416030884, |
| "learning_rate": 8.999911965535885e-05, |
| "loss": 0.1341, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.6041848299912815, |
| "grad_norm": 7.601817607879639, |
| "learning_rate": 8.993288112316014e-05, |
| "loss": 0.1553, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.6085440278988665, |
| "grad_norm": 1.6179394721984863, |
| "learning_rate": 8.986644850215644e-05, |
| "loss": 0.1662, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 3.03326678276062, |
| "learning_rate": 8.979982211523523e-05, |
| "loss": 0.1536, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.6172624237140365, |
| "grad_norm": 1.7008064985275269, |
| "learning_rate": 8.97330022862258e-05, |
| "loss": 0.1675, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.6216216216216215, |
| "grad_norm": 5.134827136993408, |
| "learning_rate": 8.96659893398976e-05, |
| "loss": 0.1658, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.6259808195292065, |
| "grad_norm": 3.6326844692230225, |
| "learning_rate": 8.959878360195876e-05, |
| "loss": 0.1819, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.6303400174367915, |
| "grad_norm": 2.703415632247925, |
| "learning_rate": 8.953138539905438e-05, |
| "loss": 0.1482, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.6346992153443767, |
| "grad_norm": 3.979405164718628, |
| "learning_rate": 8.946379505876506e-05, |
| "loss": 0.1582, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.6390584132519617, |
| "grad_norm": 3.959192991256714, |
| "learning_rate": 8.939601290960527e-05, |
| "loss": 0.1273, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.6434176111595467, |
| "grad_norm": 3.0278549194335938, |
| "learning_rate": 8.932803928102167e-05, |
| "loss": 0.1596, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.6477768090671316, |
| "grad_norm": 2.5706779956817627, |
| "learning_rate": 8.925987450339168e-05, |
| "loss": 0.1411, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.6521360069747166, |
| "grad_norm": 3.269700765609741, |
| "learning_rate": 8.919151890802172e-05, |
| "loss": 0.1273, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.6564952048823016, |
| "grad_norm": 2.8354709148406982, |
| "learning_rate": 8.912297282714564e-05, |
| "loss": 0.1608, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.6608544027898866, |
| "grad_norm": 4.5689215660095215, |
| "learning_rate": 8.905423659392316e-05, |
| "loss": 0.1328, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.6652136006974718, |
| "grad_norm": 0.7547925710678101, |
| "learning_rate": 8.898531054243822e-05, |
| "loss": 0.1371, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.6695727986050568, |
| "grad_norm": 4.1064534187316895, |
| "learning_rate": 8.891619500769729e-05, |
| "loss": 0.1616, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.6739319965126418, |
| "grad_norm": 2.2212579250335693, |
| "learning_rate": 8.884689032562785e-05, |
| "loss": 0.1283, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.6782911944202268, |
| "grad_norm": 0.8942421674728394, |
| "learning_rate": 8.87773968330767e-05, |
| "loss": 0.1357, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.6826503923278118, |
| "grad_norm": 4.484212875366211, |
| "learning_rate": 8.870771486780832e-05, |
| "loss": 0.14, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.6870095902353968, |
| "grad_norm": 4.993788719177246, |
| "learning_rate": 8.863784476850322e-05, |
| "loss": 0.1519, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.6913687881429817, |
| "grad_norm": 4.12445592880249, |
| "learning_rate": 8.856778687475635e-05, |
| "loss": 0.129, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.6957279860505667, |
| "grad_norm": 1.0877776145935059, |
| "learning_rate": 8.849754152707541e-05, |
| "loss": 0.1312, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.7000871839581517, |
| "grad_norm": 1.0505248308181763, |
| "learning_rate": 8.842710906687916e-05, |
| "loss": 0.1313, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.7044463818657367, |
| "grad_norm": 1.6745731830596924, |
| "learning_rate": 8.83564898364958e-05, |
| "loss": 0.1265, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.7088055797733217, |
| "grad_norm": 1.7037270069122314, |
| "learning_rate": 8.828568417916136e-05, |
| "loss": 0.1428, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.7131647776809067, |
| "grad_norm": 1.1031413078308105, |
| "learning_rate": 8.821469243901794e-05, |
| "loss": 0.1301, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.7175239755884917, |
| "grad_norm": 5.505949974060059, |
| "learning_rate": 8.814351496111201e-05, |
| "loss": 0.1263, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.7218831734960767, |
| "grad_norm": 11.303099632263184, |
| "learning_rate": 8.807215209139293e-05, |
| "loss": 0.1418, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.7262423714036617, |
| "grad_norm": 1.550581932067871, |
| "learning_rate": 8.8000604176711e-05, |
| "loss": 0.1367, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.7306015693112466, |
| "grad_norm": 1.0917490720748901, |
| "learning_rate": 8.792887156481598e-05, |
| "loss": 0.134, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.7349607672188316, |
| "grad_norm": 4.534971237182617, |
| "learning_rate": 8.785695460435534e-05, |
| "loss": 0.1088, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.7393199651264166, |
| "grad_norm": 3.131039619445801, |
| "learning_rate": 8.778485364487248e-05, |
| "loss": 0.1267, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.7436791630340016, |
| "grad_norm": 2.5974009037017822, |
| "learning_rate": 8.771256903680519e-05, |
| "loss": 0.1223, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.7480383609415866, |
| "grad_norm": 2.29083251953125, |
| "learning_rate": 8.764010113148382e-05, |
| "loss": 0.175, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.7523975588491716, |
| "grad_norm": 5.5496344566345215, |
| "learning_rate": 8.756745028112959e-05, |
| "loss": 0.1417, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.7567567567567568, |
| "grad_norm": 1.8061878681182861, |
| "learning_rate": 8.749461683885296e-05, |
| "loss": 0.1137, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.7611159546643418, |
| "grad_norm": 2.0431365966796875, |
| "learning_rate": 8.742160115865179e-05, |
| "loss": 0.1357, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.7654751525719268, |
| "grad_norm": 17.856874465942383, |
| "learning_rate": 8.734840359540974e-05, |
| "loss": 0.1235, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.7698343504795118, |
| "grad_norm": 1.4601013660430908, |
| "learning_rate": 8.727502450489446e-05, |
| "loss": 0.1515, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.7741935483870968, |
| "grad_norm": 1.304296612739563, |
| "learning_rate": 8.720146424375591e-05, |
| "loss": 0.1416, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.7785527462946817, |
| "grad_norm": 3.461920738220215, |
| "learning_rate": 8.712772316952458e-05, |
| "loss": 0.1428, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.7829119442022667, |
| "grad_norm": 1.8705286979675293, |
| "learning_rate": 8.705380164060982e-05, |
| "loss": 0.1494, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.787271142109852, |
| "grad_norm": 1.7368310689926147, |
| "learning_rate": 8.697970001629799e-05, |
| "loss": 0.1351, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.791630340017437, |
| "grad_norm": 10.622164726257324, |
| "learning_rate": 8.690541865675084e-05, |
| "loss": 0.1249, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.795989537925022, |
| "grad_norm": 1.60009765625, |
| "learning_rate": 8.68309579230037e-05, |
| "loss": 0.1539, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.800348735832607, |
| "grad_norm": 5.96570348739624, |
| "learning_rate": 8.675631817696372e-05, |
| "loss": 0.1325, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.804707933740192, |
| "grad_norm": 1.100277066230774, |
| "learning_rate": 8.668149978140808e-05, |
| "loss": 0.1531, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.8090671316477769, |
| "grad_norm": 3.805157423019409, |
| "learning_rate": 8.66065030999823e-05, |
| "loss": 0.1369, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.8134263295553619, |
| "grad_norm": 1.2350760698318481, |
| "learning_rate": 8.653132849719845e-05, |
| "loss": 0.1633, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.8177855274629469, |
| "grad_norm": 1.554290771484375, |
| "learning_rate": 8.64559763384333e-05, |
| "loss": 0.1456, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.8221447253705318, |
| "grad_norm": 4.020148754119873, |
| "learning_rate": 8.638044698992669e-05, |
| "loss": 0.1351, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.8265039232781168, |
| "grad_norm": 2.593574047088623, |
| "learning_rate": 8.630474081877959e-05, |
| "loss": 0.123, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.8308631211857018, |
| "grad_norm": 2.704629421234131, |
| "learning_rate": 8.62288581929525e-05, |
| "loss": 0.1513, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.8352223190932868, |
| "grad_norm": 8.686463356018066, |
| "learning_rate": 8.615279948126343e-05, |
| "loss": 0.1441, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.8395815170008718, |
| "grad_norm": 0.7825227975845337, |
| "learning_rate": 8.60765650533863e-05, |
| "loss": 0.1232, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.8439407149084568, |
| "grad_norm": 4.397798538208008, |
| "learning_rate": 8.60001552798491e-05, |
| "loss": 0.1438, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.8482999128160418, |
| "grad_norm": 1.311542272567749, |
| "learning_rate": 8.592357053203202e-05, |
| "loss": 0.1373, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.8526591107236268, |
| "grad_norm": 5.1354498863220215, |
| "learning_rate": 8.58468111821657e-05, |
| "loss": 0.1423, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.8570183086312118, |
| "grad_norm": 1.4782944917678833, |
| "learning_rate": 8.576987760332943e-05, |
| "loss": 0.1389, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.8613775065387967, |
| "grad_norm": 6.529745578765869, |
| "learning_rate": 8.56927701694493e-05, |
| "loss": 0.1546, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.8657367044463817, |
| "grad_norm": 1.609787940979004, |
| "learning_rate": 8.561548925529643e-05, |
| "loss": 0.137, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.8700959023539667, |
| "grad_norm": 0.907353401184082, |
| "learning_rate": 8.553803523648506e-05, |
| "loss": 0.1189, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.8744551002615517, |
| "grad_norm": 1.036514163017273, |
| "learning_rate": 8.546040848947086e-05, |
| "loss": 0.1202, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.878814298169137, |
| "grad_norm": 1.3986374139785767, |
| "learning_rate": 8.538260939154894e-05, |
| "loss": 0.1271, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.883173496076722, |
| "grad_norm": 1.7852044105529785, |
| "learning_rate": 8.530463832085218e-05, |
| "loss": 0.1398, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.887532693984307, |
| "grad_norm": 0.8431325554847717, |
| "learning_rate": 8.522649565634927e-05, |
| "loss": 0.1065, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.8918918918918919, |
| "grad_norm": 2.7174603939056396, |
| "learning_rate": 8.51481817778429e-05, |
| "loss": 0.1086, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.8962510897994769, |
| "grad_norm": 4.746669769287109, |
| "learning_rate": 8.506969706596797e-05, |
| "loss": 0.1113, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.9006102877070619, |
| "grad_norm": 4.1237030029296875, |
| "learning_rate": 8.499104190218964e-05, |
| "loss": 0.1104, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.9049694856146469, |
| "grad_norm": 1.4972171783447266, |
| "learning_rate": 8.49122166688016e-05, |
| "loss": 0.1305, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.909328683522232, |
| "grad_norm": 2.446460247039795, |
| "learning_rate": 8.483322174892404e-05, |
| "loss": 0.1177, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.913687881429817, |
| "grad_norm": 7.465681552886963, |
| "learning_rate": 8.475405752650199e-05, |
| "loss": 0.1059, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.918047079337402, |
| "grad_norm": 1.2010679244995117, |
| "learning_rate": 8.467472438630328e-05, |
| "loss": 0.1323, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.922406277244987, |
| "grad_norm": 3.103692054748535, |
| "learning_rate": 8.459522271391682e-05, |
| "loss": 0.1379, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.926765475152572, |
| "grad_norm": 5.2912211418151855, |
| "learning_rate": 8.451555289575057e-05, |
| "loss": 0.1461, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.931124673060157, |
| "grad_norm": 4.1434407234191895, |
| "learning_rate": 8.443571531902981e-05, |
| "loss": 0.1373, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 4.254314422607422, |
| "learning_rate": 8.435571037179512e-05, |
| "loss": 0.1266, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.939843068875327, |
| "grad_norm": 5.463832378387451, |
| "learning_rate": 8.427553844290062e-05, |
| "loss": 0.1534, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.944202266782912, |
| "grad_norm": 6.808103084564209, |
| "learning_rate": 8.419519992201201e-05, |
| "loss": 0.1324, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.948561464690497, |
| "grad_norm": 2.2606234550476074, |
| "learning_rate": 8.411469519960469e-05, |
| "loss": 0.1246, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.952920662598082, |
| "grad_norm": 7.445507526397705, |
| "learning_rate": 8.403402466696182e-05, |
| "loss": 0.125, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.957279860505667, |
| "grad_norm": 1.0063568353652954, |
| "learning_rate": 8.395318871617255e-05, |
| "loss": 0.1373, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.961639058413252, |
| "grad_norm": 1.5997445583343506, |
| "learning_rate": 8.387218774012992e-05, |
| "loss": 0.1191, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.965998256320837, |
| "grad_norm": 1.0268845558166504, |
| "learning_rate": 8.379102213252915e-05, |
| "loss": 0.1221, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.970357454228422, |
| "grad_norm": 1.748948574066162, |
| "learning_rate": 8.370969228786556e-05, |
| "loss": 0.1186, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.974716652136007, |
| "grad_norm": 6.44364595413208, |
| "learning_rate": 8.362819860143275e-05, |
| "loss": 0.1157, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.9790758500435919, |
| "grad_norm": 3.785693883895874, |
| "learning_rate": 8.354654146932066e-05, |
| "loss": 0.1319, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.9834350479511769, |
| "grad_norm": 2.268069267272949, |
| "learning_rate": 8.346472128841364e-05, |
| "loss": 0.1048, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.9877942458587619, |
| "grad_norm": 1.350669264793396, |
| "learning_rate": 8.338273845638848e-05, |
| "loss": 0.0982, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.9921534437663468, |
| "grad_norm": 1.2077120542526245, |
| "learning_rate": 8.330059337171258e-05, |
| "loss": 0.116, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.9965126416739318, |
| "grad_norm": 0.8750612735748291, |
| "learning_rate": 8.32182864336419e-05, |
| "loss": 0.1206, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.000871839581517, |
| "grad_norm": 2.0030882358551025, |
| "learning_rate": 8.313581804221908e-05, |
| "loss": 0.1223, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.005231037489102, |
| "grad_norm": 2.0037591457366943, |
| "learning_rate": 8.305318859827147e-05, |
| "loss": 0.1079, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.009590235396687, |
| "grad_norm": 1.143376111984253, |
| "learning_rate": 8.297039850340923e-05, |
| "loss": 0.125, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.0139494333042722, |
| "grad_norm": 1.0574803352355957, |
| "learning_rate": 8.288744816002331e-05, |
| "loss": 0.0963, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.018308631211857, |
| "grad_norm": 0.8436086177825928, |
| "learning_rate": 8.280433797128357e-05, |
| "loss": 0.1012, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.022667829119442, |
| "grad_norm": 7.240283012390137, |
| "learning_rate": 8.272106834113674e-05, |
| "loss": 0.1186, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.027027027027027, |
| "grad_norm": 0.9012176394462585, |
| "learning_rate": 8.26376396743045e-05, |
| "loss": 0.1106, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.031386224934612, |
| "grad_norm": 1.180629849433899, |
| "learning_rate": 8.25540523762815e-05, |
| "loss": 0.1064, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.035745422842197, |
| "grad_norm": 1.1704062223434448, |
| "learning_rate": 8.247030685333346e-05, |
| "loss": 0.1247, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.040104620749782, |
| "grad_norm": 2.9050512313842773, |
| "learning_rate": 8.238640351249503e-05, |
| "loss": 0.1024, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.044463818657367, |
| "grad_norm": 1.1793980598449707, |
| "learning_rate": 8.2302342761568e-05, |
| "loss": 0.1375, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.048823016564952, |
| "grad_norm": 0.8722379803657532, |
| "learning_rate": 8.221812500911919e-05, |
| "loss": 0.0941, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.053182214472537, |
| "grad_norm": 2.5320537090301514, |
| "learning_rate": 8.213375066447853e-05, |
| "loss": 0.1049, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.057541412380122, |
| "grad_norm": 1.0855395793914795, |
| "learning_rate": 8.204922013773702e-05, |
| "loss": 0.1126, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.061900610287707, |
| "grad_norm": 7.323856830596924, |
| "learning_rate": 8.196453383974478e-05, |
| "loss": 0.1281, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.066259808195292, |
| "grad_norm": 5.2091474533081055, |
| "learning_rate": 8.187969218210904e-05, |
| "loss": 0.1096, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.070619006102877, |
| "grad_norm": 1.0623677968978882, |
| "learning_rate": 8.179469557719213e-05, |
| "loss": 0.0995, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.074978204010462, |
| "grad_norm": 4.782459259033203, |
| "learning_rate": 8.170954443810948e-05, |
| "loss": 0.1205, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.079337401918047, |
| "grad_norm": 0.9393866658210754, |
| "learning_rate": 8.162423917872764e-05, |
| "loss": 0.1196, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.083696599825632, |
| "grad_norm": 6.70692253112793, |
| "learning_rate": 8.153878021366217e-05, |
| "loss": 0.1242, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.088055797733217, |
| "grad_norm": 9.28081226348877, |
| "learning_rate": 8.14531679582758e-05, |
| "loss": 0.1081, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.092414995640802, |
| "grad_norm": 0.7705070972442627, |
| "learning_rate": 8.136740282867621e-05, |
| "loss": 0.1239, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.096774193548387, |
| "grad_norm": 11.719225883483887, |
| "learning_rate": 8.128148524171418e-05, |
| "loss": 0.1137, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.101133391455972, |
| "grad_norm": 0.9089305996894836, |
| "learning_rate": 8.119541561498146e-05, |
| "loss": 0.1466, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.105492589363557, |
| "grad_norm": 0.7215758562088013, |
| "learning_rate": 8.110919436680877e-05, |
| "loss": 0.0967, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.109851787271142, |
| "grad_norm": 1.2035927772521973, |
| "learning_rate": 8.102282191626378e-05, |
| "loss": 0.1128, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.114210985178727, |
| "grad_norm": 2.1401278972625732, |
| "learning_rate": 8.0936298683149e-05, |
| "loss": 0.097, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.118570183086312, |
| "grad_norm": 0.7536699771881104, |
| "learning_rate": 8.084962508799991e-05, |
| "loss": 0.1036, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.122929380993897, |
| "grad_norm": 0.9746949672698975, |
| "learning_rate": 8.076280155208273e-05, |
| "loss": 0.1012, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.127288578901482, |
| "grad_norm": 2.9108328819274902, |
| "learning_rate": 8.067582849739245e-05, |
| "loss": 0.0931, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.131647776809067, |
| "grad_norm": 2.736168622970581, |
| "learning_rate": 8.058870634665079e-05, |
| "loss": 0.136, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.1360069747166524, |
| "grad_norm": 9.78114128112793, |
| "learning_rate": 8.050143552330414e-05, |
| "loss": 0.0862, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.1403661726242373, |
| "grad_norm": 0.6983315944671631, |
| "learning_rate": 8.041401645152151e-05, |
| "loss": 0.0964, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.1447253705318223, |
| "grad_norm": 1.677988886833191, |
| "learning_rate": 8.032644955619239e-05, |
| "loss": 0.1192, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.1490845684394073, |
| "grad_norm": 5.114157199859619, |
| "learning_rate": 8.023873526292483e-05, |
| "loss": 0.1027, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.1534437663469923, |
| "grad_norm": 1.319881796836853, |
| "learning_rate": 8.015087399804322e-05, |
| "loss": 0.082, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.1578029642545773, |
| "grad_norm": 1.808841347694397, |
| "learning_rate": 8.006286618858635e-05, |
| "loss": 0.1118, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.1621621621621623, |
| "grad_norm": 3.633739709854126, |
| "learning_rate": 7.99747122623052e-05, |
| "loss": 0.1027, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.1665213600697473, |
| "grad_norm": 0.9861094951629639, |
| "learning_rate": 7.988641264766097e-05, |
| "loss": 0.1185, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.1708805579773323, |
| "grad_norm": 1.4471122026443481, |
| "learning_rate": 7.9797967773823e-05, |
| "loss": 0.0963, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.1752397558849172, |
| "grad_norm": 1.0878154039382935, |
| "learning_rate": 7.970937807066659e-05, |
| "loss": 0.1053, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.1795989537925022, |
| "grad_norm": 1.0093618631362915, |
| "learning_rate": 7.962064396877098e-05, |
| "loss": 0.1105, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.1839581517000872, |
| "grad_norm": 0.8233932852745056, |
| "learning_rate": 7.953176589941722e-05, |
| "loss": 0.104, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.188317349607672, |
| "grad_norm": 0.7308264970779419, |
| "learning_rate": 7.944274429458614e-05, |
| "loss": 0.1007, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.192676547515257, |
| "grad_norm": 0.8960587382316589, |
| "learning_rate": 7.93535795869562e-05, |
| "loss": 0.1017, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.197035745422842, |
| "grad_norm": 1.306472659111023, |
| "learning_rate": 7.926427220990134e-05, |
| "loss": 0.0971, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.201394943330427, |
| "grad_norm": 1.0485479831695557, |
| "learning_rate": 7.9174822597489e-05, |
| "loss": 0.1033, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.205754141238012, |
| "grad_norm": 4.241369724273682, |
| "learning_rate": 7.908523118447789e-05, |
| "loss": 0.0802, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.210113339145597, |
| "grad_norm": 1.2369358539581299, |
| "learning_rate": 7.89954984063159e-05, |
| "loss": 0.1125, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.214472537053182, |
| "grad_norm": 0.5996437072753906, |
| "learning_rate": 7.890562469913811e-05, |
| "loss": 0.1047, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.218831734960767, |
| "grad_norm": 0.778715968132019, |
| "learning_rate": 7.881561049976447e-05, |
| "loss": 0.0911, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.223190932868352, |
| "grad_norm": 18.25193977355957, |
| "learning_rate": 7.872545624569779e-05, |
| "loss": 0.1072, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.227550130775937, |
| "grad_norm": 0.5723896622657776, |
| "learning_rate": 7.863516237512164e-05, |
| "loss": 0.1119, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.231909328683522, |
| "grad_norm": 1.7326332330703735, |
| "learning_rate": 7.854472932689815e-05, |
| "loss": 0.1062, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.236268526591107, |
| "grad_norm": 7.85261869430542, |
| "learning_rate": 7.845415754056591e-05, |
| "loss": 0.1157, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.240627724498692, |
| "grad_norm": 0.9551669359207153, |
| "learning_rate": 7.836344745633783e-05, |
| "loss": 0.0999, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.244986922406277, |
| "grad_norm": 0.7430893778800964, |
| "learning_rate": 7.8272599515099e-05, |
| "loss": 0.096, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.249346120313862, |
| "grad_norm": 6.560830116271973, |
| "learning_rate": 7.818161415840453e-05, |
| "loss": 0.1066, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.2537053182214475, |
| "grad_norm": 5.854716777801514, |
| "learning_rate": 7.809049182847745e-05, |
| "loss": 0.0974, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.258064516129032, |
| "grad_norm": 4.683152675628662, |
| "learning_rate": 7.799923296820653e-05, |
| "loss": 0.0949, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.2624237140366175, |
| "grad_norm": 0.6916239261627197, |
| "learning_rate": 7.790783802114408e-05, |
| "loss": 0.1007, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.2667829119442024, |
| "grad_norm": 0.8912094235420227, |
| "learning_rate": 7.781630743150392e-05, |
| "loss": 0.0906, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.2711421098517874, |
| "grad_norm": 4.473104953765869, |
| "learning_rate": 7.772464164415907e-05, |
| "loss": 0.0984, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.2755013077593724, |
| "grad_norm": 0.9665694832801819, |
| "learning_rate": 7.763284110463973e-05, |
| "loss": 0.1049, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.2798605056669574, |
| "grad_norm": 9.33495807647705, |
| "learning_rate": 7.754090625913099e-05, |
| "loss": 0.0989, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.2842197035745424, |
| "grad_norm": 7.102493762969971, |
| "learning_rate": 7.744883755447075e-05, |
| "loss": 0.0991, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.2885789014821274, |
| "grad_norm": 3.3411788940429688, |
| "learning_rate": 7.735663543814749e-05, |
| "loss": 0.1208, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.2929380993897124, |
| "grad_norm": 0.6373879909515381, |
| "learning_rate": 7.726430035829813e-05, |
| "loss": 0.0938, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.2972972972972974, |
| "grad_norm": 2.2355239391326904, |
| "learning_rate": 7.717183276370586e-05, |
| "loss": 0.0954, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.3016564952048824, |
| "grad_norm": 0.8228651285171509, |
| "learning_rate": 7.707923310379794e-05, |
| "loss": 0.0911, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.3060156931124673, |
| "grad_norm": 1.4001113176345825, |
| "learning_rate": 7.698650182864351e-05, |
| "loss": 0.101, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.3103748910200523, |
| "grad_norm": 8.409546852111816, |
| "learning_rate": 7.689363938895138e-05, |
| "loss": 0.1008, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.3147340889276373, |
| "grad_norm": 3.2548961639404297, |
| "learning_rate": 7.680064623606791e-05, |
| "loss": 0.0809, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.3190932868352223, |
| "grad_norm": 0.7357600331306458, |
| "learning_rate": 7.670752282197476e-05, |
| "loss": 0.0834, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.3234524847428073, |
| "grad_norm": 0.5668644905090332, |
| "learning_rate": 7.66142695992867e-05, |
| "loss": 0.0998, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.3278116826503923, |
| "grad_norm": 0.7869893908500671, |
| "learning_rate": 7.652088702124944e-05, |
| "loss": 0.0934, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.3321708805579773, |
| "grad_norm": 1.659505009651184, |
| "learning_rate": 7.64273755417374e-05, |
| "loss": 0.0888, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.3365300784655623, |
| "grad_norm": 0.6730926036834717, |
| "learning_rate": 7.633373561525148e-05, |
| "loss": 0.0892, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.3408892763731473, |
| "grad_norm": 5.145294666290283, |
| "learning_rate": 7.623996769691691e-05, |
| "loss": 0.103, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.3452484742807322, |
| "grad_norm": 7.745975017547607, |
| "learning_rate": 7.614607224248103e-05, |
| "loss": 0.0946, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.3496076721883172, |
| "grad_norm": 1.3489940166473389, |
| "learning_rate": 7.605204970831096e-05, |
| "loss": 0.0831, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.353966870095902, |
| "grad_norm": 1.2738442420959473, |
| "learning_rate": 7.595790055139163e-05, |
| "loss": 0.1029, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.358326068003487, |
| "grad_norm": 0.987291693687439, |
| "learning_rate": 7.586362522932323e-05, |
| "loss": 0.109, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.362685265911072, |
| "grad_norm": 6.236382961273193, |
| "learning_rate": 7.576922420031929e-05, |
| "loss": 0.1027, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.367044463818657, |
| "grad_norm": 0.9077024459838867, |
| "learning_rate": 7.567469792320428e-05, |
| "loss": 0.1084, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.3714036617262426, |
| "grad_norm": 0.6678237915039062, |
| "learning_rate": 7.558004685741137e-05, |
| "loss": 0.0982, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.375762859633827, |
| "grad_norm": 0.6273680925369263, |
| "learning_rate": 7.548527146298036e-05, |
| "loss": 0.0933, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.3801220575414126, |
| "grad_norm": 0.6411100625991821, |
| "learning_rate": 7.539037220055527e-05, |
| "loss": 0.0914, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.384481255448997, |
| "grad_norm": 0.9402559995651245, |
| "learning_rate": 7.529534953138213e-05, |
| "loss": 0.0778, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.3888404533565826, |
| "grad_norm": 0.9908243417739868, |
| "learning_rate": 7.520020391730684e-05, |
| "loss": 0.0866, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.3931996512641676, |
| "grad_norm": 1.700788140296936, |
| "learning_rate": 7.510493582077281e-05, |
| "loss": 0.098, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.3975588491717525, |
| "grad_norm": 6.728827476501465, |
| "learning_rate": 7.500954570481882e-05, |
| "loss": 0.1064, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.4019180470793375, |
| "grad_norm": 4.217410564422607, |
| "learning_rate": 7.491403403307662e-05, |
| "loss": 0.1054, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.4062772449869225, |
| "grad_norm": 7.545099258422852, |
| "learning_rate": 7.481840126976885e-05, |
| "loss": 0.0976, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.4106364428945075, |
| "grad_norm": 6.637195110321045, |
| "learning_rate": 7.472264787970666e-05, |
| "loss": 0.1006, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.4149956408020925, |
| "grad_norm": 0.878823459148407, |
| "learning_rate": 7.462677432828751e-05, |
| "loss": 0.084, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.4193548387096775, |
| "grad_norm": 1.0781915187835693, |
| "learning_rate": 7.453078108149287e-05, |
| "loss": 0.0995, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.4237140366172625, |
| "grad_norm": 0.7587170004844666, |
| "learning_rate": 7.443466860588599e-05, |
| "loss": 0.0971, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.4280732345248475, |
| "grad_norm": 1.204644799232483, |
| "learning_rate": 7.43384373686096e-05, |
| "loss": 0.0864, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.4324324324324325, |
| "grad_norm": 1.538623571395874, |
| "learning_rate": 7.424208783738367e-05, |
| "loss": 0.104, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.4367916303400174, |
| "grad_norm": 1.163440465927124, |
| "learning_rate": 7.414562048050315e-05, |
| "loss": 0.0782, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.4411508282476024, |
| "grad_norm": 1.4269083738327026, |
| "learning_rate": 7.404903576683559e-05, |
| "loss": 0.0973, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.4455100261551874, |
| "grad_norm": 2.2883760929107666, |
| "learning_rate": 7.3952334165819e-05, |
| "loss": 0.0805, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.4498692240627724, |
| "grad_norm": 0.9393578767776489, |
| "learning_rate": 7.385551614745952e-05, |
| "loss": 0.0712, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.4542284219703574, |
| "grad_norm": 7.377894401550293, |
| "learning_rate": 7.375858218232905e-05, |
| "loss": 0.0733, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.4585876198779424, |
| "grad_norm": 0.8878276944160461, |
| "learning_rate": 7.366153274156312e-05, |
| "loss": 0.0836, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.4629468177855274, |
| "grad_norm": 0.679876983165741, |
| "learning_rate": 7.356436829685844e-05, |
| "loss": 0.0877, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.4673060156931124, |
| "grad_norm": 0.6781834363937378, |
| "learning_rate": 7.346708932047074e-05, |
| "loss": 0.0897, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.4716652136006974, |
| "grad_norm": 6.125814437866211, |
| "learning_rate": 7.336969628521237e-05, |
| "loss": 0.09, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.4760244115082823, |
| "grad_norm": 0.6775864958763123, |
| "learning_rate": 7.32721896644501e-05, |
| "loss": 0.0975, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.4803836094158673, |
| "grad_norm": 0.590700089931488, |
| "learning_rate": 7.317456993210272e-05, |
| "loss": 0.098, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.4847428073234523, |
| "grad_norm": 2.4274611473083496, |
| "learning_rate": 7.307683756263881e-05, |
| "loss": 0.0749, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.4891020052310373, |
| "grad_norm": 0.6488708257675171, |
| "learning_rate": 7.297899303107441e-05, |
| "loss": 0.0906, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.4934612031386223, |
| "grad_norm": 0.7550959587097168, |
| "learning_rate": 7.288103681297068e-05, |
| "loss": 0.0891, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.4978204010462077, |
| "grad_norm": 12.706351280212402, |
| "learning_rate": 7.278296938443166e-05, |
| "loss": 0.0779, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.5021795989537923, |
| "grad_norm": 0.9015982151031494, |
| "learning_rate": 7.26847912221019e-05, |
| "loss": 0.0884, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.5065387968613777, |
| "grad_norm": 3.624945640563965, |
| "learning_rate": 7.258650280316415e-05, |
| "loss": 0.0887, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.5108979947689622, |
| "grad_norm": 0.4754033088684082, |
| "learning_rate": 7.248810460533706e-05, |
| "loss": 0.0806, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.5152571926765477, |
| "grad_norm": 1.9334558248519897, |
| "learning_rate": 7.238959710687282e-05, |
| "loss": 0.0778, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.5196163905841327, |
| "grad_norm": 1.0764563083648682, |
| "learning_rate": 7.229098078655489e-05, |
| "loss": 0.0887, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.5239755884917177, |
| "grad_norm": 0.44298532605171204, |
| "learning_rate": 7.219225612369565e-05, |
| "loss": 0.0691, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.5283347863993026, |
| "grad_norm": 0.8877589106559753, |
| "learning_rate": 7.209342359813404e-05, |
| "loss": 0.0959, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.5326939843068876, |
| "grad_norm": 0.5124678015708923, |
| "learning_rate": 7.199448369023327e-05, |
| "loss": 0.0816, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.5370531822144726, |
| "grad_norm": 1.4232996702194214, |
| "learning_rate": 7.189543688087845e-05, |
| "loss": 0.0827, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.5414123801220576, |
| "grad_norm": 0.7989560961723328, |
| "learning_rate": 7.17962836514743e-05, |
| "loss": 0.0919, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.5457715780296426, |
| "grad_norm": 0.958767831325531, |
| "learning_rate": 7.169702448394279e-05, |
| "loss": 0.0777, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.5501307759372276, |
| "grad_norm": 0.5822910070419312, |
| "learning_rate": 7.159765986072071e-05, |
| "loss": 0.0823, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.5544899738448126, |
| "grad_norm": 3.608449935913086, |
| "learning_rate": 7.149819026475751e-05, |
| "loss": 0.0895, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.5588491717523976, |
| "grad_norm": 1.6009862422943115, |
| "learning_rate": 7.139861617951275e-05, |
| "loss": 0.0869, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.5632083696599826, |
| "grad_norm": 0.9188637733459473, |
| "learning_rate": 7.129893808895395e-05, |
| "loss": 0.1151, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.5675675675675675, |
| "grad_norm": 2.6896746158599854, |
| "learning_rate": 7.119915647755404e-05, |
| "loss": 0.0987, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.5719267654751525, |
| "grad_norm": 11.660469055175781, |
| "learning_rate": 7.109927183028914e-05, |
| "loss": 0.1049, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.5762859633827375, |
| "grad_norm": 1.580359935760498, |
| "learning_rate": 7.099928463263619e-05, |
| "loss": 0.0894, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.5806451612903225, |
| "grad_norm": 0.4905918538570404, |
| "learning_rate": 7.08991953705705e-05, |
| "loss": 0.0752, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.5850043591979075, |
| "grad_norm": 0.6062779426574707, |
| "learning_rate": 7.07990045305635e-05, |
| "loss": 0.0762, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.5893635571054925, |
| "grad_norm": 0.9602181911468506, |
| "learning_rate": 7.069871259958034e-05, |
| "loss": 0.0864, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.5937227550130775, |
| "grad_norm": 3.8249127864837646, |
| "learning_rate": 7.059832006507745e-05, |
| "loss": 0.0913, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.5980819529206625, |
| "grad_norm": 1.6133575439453125, |
| "learning_rate": 7.049782741500028e-05, |
| "loss": 0.0744, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.6024411508282475, |
| "grad_norm": 1.4505705833435059, |
| "learning_rate": 7.039723513778087e-05, |
| "loss": 0.0862, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.606800348735833, |
| "grad_norm": 0.8165828585624695, |
| "learning_rate": 7.029654372233544e-05, |
| "loss": 0.0833, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.6111595466434174, |
| "grad_norm": 1.2085750102996826, |
| "learning_rate": 7.019575365806215e-05, |
| "loss": 0.0985, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.615518744551003, |
| "grad_norm": 0.4820369482040405, |
| "learning_rate": 7.009486543483858e-05, |
| "loss": 0.1111, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.6198779424585874, |
| "grad_norm": 1.5817570686340332, |
| "learning_rate": 6.999387954301934e-05, |
| "loss": 0.107, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.624237140366173, |
| "grad_norm": 1.02738356590271, |
| "learning_rate": 6.989279647343388e-05, |
| "loss": 0.0881, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.6285963382737574, |
| "grad_norm": 3.4080002307891846, |
| "learning_rate": 6.979161671738382e-05, |
| "loss": 0.0849, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.632955536181343, |
| "grad_norm": 0.4838300347328186, |
| "learning_rate": 6.969034076664085e-05, |
| "loss": 0.0913, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.637314734088928, |
| "grad_norm": 0.7174921631813049, |
| "learning_rate": 6.958896911344411e-05, |
| "loss": 0.0868, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.641673931996513, |
| "grad_norm": 0.812843382358551, |
| "learning_rate": 6.948750225049791e-05, |
| "loss": 0.0831, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.646033129904098, |
| "grad_norm": 1.0127894878387451, |
| "learning_rate": 6.938594067096936e-05, |
| "loss": 0.0861, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.6503923278116828, |
| "grad_norm": 8.35067367553711, |
| "learning_rate": 6.928428486848587e-05, |
| "loss": 0.0849, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.6547515257192678, |
| "grad_norm": 0.7862615585327148, |
| "learning_rate": 6.918253533713282e-05, |
| "loss": 0.0807, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.6591107236268527, |
| "grad_norm": 0.5965198278427124, |
| "learning_rate": 6.908069257145118e-05, |
| "loss": 0.1029, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.6634699215344377, |
| "grad_norm": 1.679243803024292, |
| "learning_rate": 6.897875706643506e-05, |
| "loss": 0.0702, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.6678291194420227, |
| "grad_norm": 0.9817731976509094, |
| "learning_rate": 6.887672931752927e-05, |
| "loss": 0.0817, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.6721883173496077, |
| "grad_norm": 0.6078197360038757, |
| "learning_rate": 6.877460982062706e-05, |
| "loss": 0.0705, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.6765475152571927, |
| "grad_norm": 2.130326271057129, |
| "learning_rate": 6.86723990720675e-05, |
| "loss": 0.0907, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.6809067131647777, |
| "grad_norm": 0.932861328125, |
| "learning_rate": 6.857009756863326e-05, |
| "loss": 0.0974, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.6852659110723627, |
| "grad_norm": 0.7639493346214294, |
| "learning_rate": 6.846770580754807e-05, |
| "loss": 0.1037, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.6896251089799477, |
| "grad_norm": 0.9204385280609131, |
| "learning_rate": 6.836522428647438e-05, |
| "loss": 0.0859, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.6939843068875327, |
| "grad_norm": 0.6915794014930725, |
| "learning_rate": 6.826265350351083e-05, |
| "loss": 0.0664, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.6983435047951176, |
| "grad_norm": 0.9047311544418335, |
| "learning_rate": 6.815999395719e-05, |
| "loss": 0.105, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.7027027027027026, |
| "grad_norm": 0.9636686444282532, |
| "learning_rate": 6.805724614647586e-05, |
| "loss": 0.072, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.7070619006102876, |
| "grad_norm": 0.9881836175918579, |
| "learning_rate": 6.795441057076136e-05, |
| "loss": 0.0813, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.7114210985178726, |
| "grad_norm": 0.8798909187316895, |
| "learning_rate": 6.785148772986603e-05, |
| "loss": 0.0811, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.7157802964254576, |
| "grad_norm": 2.17606520652771, |
| "learning_rate": 6.774847812403355e-05, |
| "loss": 0.0894, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.7201394943330426, |
| "grad_norm": 1.370004415512085, |
| "learning_rate": 6.76453822539293e-05, |
| "loss": 0.0858, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.7244986922406276, |
| "grad_norm": 0.4295946955680847, |
| "learning_rate": 6.754220062063793e-05, |
| "loss": 0.0846, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.7288578901482126, |
| "grad_norm": 0.9039126634597778, |
| "learning_rate": 6.743893372566099e-05, |
| "loss": 0.0762, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.733217088055798, |
| "grad_norm": 0.831985592842102, |
| "learning_rate": 6.733558207091434e-05, |
| "loss": 0.0763, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.7375762859633825, |
| "grad_norm": 0.4425466060638428, |
| "learning_rate": 6.723214615872585e-05, |
| "loss": 0.0798, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.741935483870968, |
| "grad_norm": 0.5437725782394409, |
| "learning_rate": 6.712862649183295e-05, |
| "loss": 0.0709, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.7462946817785525, |
| "grad_norm": 0.663580060005188, |
| "learning_rate": 6.70250235733801e-05, |
| "loss": 0.0847, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.750653879686138, |
| "grad_norm": 1.1464177370071411, |
| "learning_rate": 6.692133790691639e-05, |
| "loss": 0.0665, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.7550130775937225, |
| "grad_norm": 0.781203031539917, |
| "learning_rate": 6.681756999639311e-05, |
| "loss": 0.074, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.759372275501308, |
| "grad_norm": 0.7381405830383301, |
| "learning_rate": 6.671372034616132e-05, |
| "loss": 0.0841, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.763731473408893, |
| "grad_norm": 0.7967920899391174, |
| "learning_rate": 6.660978946096933e-05, |
| "loss": 0.0954, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.768090671316478, |
| "grad_norm": 1.6040399074554443, |
| "learning_rate": 6.650577784596026e-05, |
| "loss": 0.0774, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.772449869224063, |
| "grad_norm": 0.9123194217681885, |
| "learning_rate": 6.640168600666967e-05, |
| "loss": 0.0835, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.776809067131648, |
| "grad_norm": 1.2217650413513184, |
| "learning_rate": 6.629751444902299e-05, |
| "loss": 0.0717, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.781168265039233, |
| "grad_norm": 0.8930469751358032, |
| "learning_rate": 6.619326367933312e-05, |
| "loss": 0.0708, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.785527462946818, |
| "grad_norm": 0.5414270162582397, |
| "learning_rate": 6.608893420429798e-05, |
| "loss": 0.0749, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.789886660854403, |
| "grad_norm": 0.9353501200675964, |
| "learning_rate": 6.598452653099803e-05, |
| "loss": 0.0801, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.794245858761988, |
| "grad_norm": 0.48062050342559814, |
| "learning_rate": 6.588004116689375e-05, |
| "loss": 0.0631, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.798605056669573, |
| "grad_norm": 0.7349405288696289, |
| "learning_rate": 6.57754786198233e-05, |
| "loss": 0.0794, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.802964254577158, |
| "grad_norm": 0.48554134368896484, |
| "learning_rate": 6.567083939799992e-05, |
| "loss": 0.07, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.807323452484743, |
| "grad_norm": 0.5531144738197327, |
| "learning_rate": 6.556612401000954e-05, |
| "loss": 0.0673, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.811682650392328, |
| "grad_norm": 0.40997496247291565, |
| "learning_rate": 6.54613329648083e-05, |
| "loss": 0.0732, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.8160418482999128, |
| "grad_norm": 0.8054825663566589, |
| "learning_rate": 6.535646677172005e-05, |
| "loss": 0.0656, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.8204010462074978, |
| "grad_norm": 4.4836554527282715, |
| "learning_rate": 6.52515259404339e-05, |
| "loss": 0.0751, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.8247602441150828, |
| "grad_norm": 0.6638883352279663, |
| "learning_rate": 6.514651098100167e-05, |
| "loss": 0.0623, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.8291194420226677, |
| "grad_norm": 0.8358332514762878, |
| "learning_rate": 6.504142240383555e-05, |
| "loss": 0.0911, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.8334786399302527, |
| "grad_norm": 0.8474389910697937, |
| "learning_rate": 6.493626071970549e-05, |
| "loss": 0.0609, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.8378378378378377, |
| "grad_norm": 1.1841853857040405, |
| "learning_rate": 6.483102643973682e-05, |
| "loss": 0.066, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.8421970357454227, |
| "grad_norm": 0.5661811828613281, |
| "learning_rate": 6.472572007540764e-05, |
| "loss": 0.07, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.8465562336530077, |
| "grad_norm": 0.6846306920051575, |
| "learning_rate": 6.462034213854645e-05, |
| "loss": 0.0701, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.850915431560593, |
| "grad_norm": 0.5802059769630432, |
| "learning_rate": 6.451489314132962e-05, |
| "loss": 0.0755, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.8552746294681777, |
| "grad_norm": 0.7946178913116455, |
| "learning_rate": 6.440937359627893e-05, |
| "loss": 0.0674, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.859633827375763, |
| "grad_norm": 0.34522873163223267, |
| "learning_rate": 6.430378401625894e-05, |
| "loss": 0.0735, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.8639930252833476, |
| "grad_norm": 0.5881823897361755, |
| "learning_rate": 6.419812491447472e-05, |
| "loss": 0.0771, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.868352223190933, |
| "grad_norm": 2.1091787815093994, |
| "learning_rate": 6.409239680446919e-05, |
| "loss": 0.0712, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.8727114210985176, |
| "grad_norm": 0.9964777827262878, |
| "learning_rate": 6.398660020012072e-05, |
| "loss": 0.0804, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.877070619006103, |
| "grad_norm": 0.7100098729133606, |
| "learning_rate": 6.38807356156405e-05, |
| "loss": 0.0785, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.881429816913688, |
| "grad_norm": 0.7806901931762695, |
| "learning_rate": 6.377480356557022e-05, |
| "loss": 0.0603, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.885789014821273, |
| "grad_norm": 0.6067240834236145, |
| "learning_rate": 6.366880456477942e-05, |
| "loss": 0.0683, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.890148212728858, |
| "grad_norm": 0.9816045761108398, |
| "learning_rate": 6.356273912846312e-05, |
| "loss": 0.0696, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.894507410636443, |
| "grad_norm": 0.5616809129714966, |
| "learning_rate": 6.34566077721391e-05, |
| "loss": 0.0605, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.898866608544028, |
| "grad_norm": 0.5172761082649231, |
| "learning_rate": 6.335041101164569e-05, |
| "loss": 0.0747, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.903225806451613, |
| "grad_norm": 0.6442545056343079, |
| "learning_rate": 6.324414936313904e-05, |
| "loss": 0.0581, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.907585004359198, |
| "grad_norm": 0.48619189858436584, |
| "learning_rate": 6.313782334309066e-05, |
| "loss": 0.0597, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.911944202266783, |
| "grad_norm": 0.8751986026763916, |
| "learning_rate": 6.303143346828499e-05, |
| "loss": 0.0697, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.916303400174368, |
| "grad_norm": 0.6420536041259766, |
| "learning_rate": 6.292498025581674e-05, |
| "loss": 0.0595, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.920662598081953, |
| "grad_norm": 0.6701872944831848, |
| "learning_rate": 6.281846422308857e-05, |
| "loss": 0.0676, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.925021795989538, |
| "grad_norm": 0.7736986875534058, |
| "learning_rate": 6.271188588780839e-05, |
| "loss": 0.0762, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.929380993897123, |
| "grad_norm": 1.2286581993103027, |
| "learning_rate": 6.260524576798694e-05, |
| "loss": 0.08, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.933740191804708, |
| "grad_norm": 0.6742132902145386, |
| "learning_rate": 6.249854438193528e-05, |
| "loss": 0.0707, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.938099389712293, |
| "grad_norm": 0.7686195373535156, |
| "learning_rate": 6.239178224826224e-05, |
| "loss": 0.0852, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.942458587619878, |
| "grad_norm": 0.44351834058761597, |
| "learning_rate": 6.228495988587188e-05, |
| "loss": 0.0678, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.946817785527463, |
| "grad_norm": 0.33258873224258423, |
| "learning_rate": 6.217807781396106e-05, |
| "loss": 0.0656, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.951176983435048, |
| "grad_norm": 0.9385144114494324, |
| "learning_rate": 6.207113655201676e-05, |
| "loss": 0.0641, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.955536181342633, |
| "grad_norm": 0.7347838878631592, |
| "learning_rate": 6.196413661981368e-05, |
| "loss": 0.073, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.959895379250218, |
| "grad_norm": 1.1833434104919434, |
| "learning_rate": 6.185707853741175e-05, |
| "loss": 0.0691, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.964254577157803, |
| "grad_norm": 0.773758590221405, |
| "learning_rate": 6.174996282515344e-05, |
| "loss": 0.0668, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.968613775065388, |
| "grad_norm": 0.6628344655036926, |
| "learning_rate": 6.164279000366131e-05, |
| "loss": 0.0596, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.972972972972973, |
| "grad_norm": 0.7836283445358276, |
| "learning_rate": 6.153556059383561e-05, |
| "loss": 0.0774, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.9773321708805582, |
| "grad_norm": 0.6452964544296265, |
| "learning_rate": 6.142827511685152e-05, |
| "loss": 0.0654, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.981691368788143, |
| "grad_norm": 1.0415467023849487, |
| "learning_rate": 6.132093409415678e-05, |
| "loss": 0.0754, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.986050566695728, |
| "grad_norm": 2.455578088760376, |
| "learning_rate": 6.121353804746907e-05, |
| "loss": 0.0672, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.9904097646033128, |
| "grad_norm": 0.8278747797012329, |
| "learning_rate": 6.110608749877352e-05, |
| "loss": 0.0627, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.994768962510898, |
| "grad_norm": 1.0058568716049194, |
| "learning_rate": 6.0998582970320205e-05, |
| "loss": 0.089, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.9991281604184827, |
| "grad_norm": 0.38594409823417664, |
| "learning_rate": 6.0891024984621506e-05, |
| "loss": 0.0785, |
| "step": 6880 |
| }, |
| { |
| "epoch": 3.003487358326068, |
| "grad_norm": 0.6081041097640991, |
| "learning_rate": 6.078341406444961e-05, |
| "loss": 0.068, |
| "step": 6890 |
| }, |
| { |
| "epoch": 3.007846556233653, |
| "grad_norm": 0.5051882863044739, |
| "learning_rate": 6.067575073283405e-05, |
| "loss": 0.077, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.012205754141238, |
| "grad_norm": 0.813621461391449, |
| "learning_rate": 6.0568035513059073e-05, |
| "loss": 0.0561, |
| "step": 6910 |
| }, |
| { |
| "epoch": 3.016564952048823, |
| "grad_norm": 1.0698422193527222, |
| "learning_rate": 6.046026892866109e-05, |
| "loss": 0.0809, |
| "step": 6920 |
| }, |
| { |
| "epoch": 3.020924149956408, |
| "grad_norm": 0.8250692486763, |
| "learning_rate": 6.0352451503426214e-05, |
| "loss": 0.0737, |
| "step": 6930 |
| }, |
| { |
| "epoch": 3.025283347863993, |
| "grad_norm": 0.6636641621589661, |
| "learning_rate": 6.024458376138762e-05, |
| "loss": 0.0526, |
| "step": 6940 |
| }, |
| { |
| "epoch": 3.029642545771578, |
| "grad_norm": 2.4719927310943604, |
| "learning_rate": 6.013666622682306e-05, |
| "loss": 0.0587, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.034001743679163, |
| "grad_norm": 0.7209786176681519, |
| "learning_rate": 6.002869942425231e-05, |
| "loss": 0.0616, |
| "step": 6960 |
| }, |
| { |
| "epoch": 3.038360941586748, |
| "grad_norm": 0.8328136801719666, |
| "learning_rate": 5.992068387843459e-05, |
| "loss": 0.0669, |
| "step": 6970 |
| }, |
| { |
| "epoch": 3.042720139494333, |
| "grad_norm": 1.0429414510726929, |
| "learning_rate": 5.981262011436603e-05, |
| "loss": 0.0752, |
| "step": 6980 |
| }, |
| { |
| "epoch": 3.047079337401918, |
| "grad_norm": 0.6350201368331909, |
| "learning_rate": 5.970450865727712e-05, |
| "loss": 0.0588, |
| "step": 6990 |
| }, |
| { |
| "epoch": 3.051438535309503, |
| "grad_norm": 0.8136351704597473, |
| "learning_rate": 5.9596350032630156e-05, |
| "loss": 0.0625, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.055797733217088, |
| "grad_norm": 0.6018245816230774, |
| "learning_rate": 5.9488144766116714e-05, |
| "loss": 0.0629, |
| "step": 7010 |
| }, |
| { |
| "epoch": 3.060156931124673, |
| "grad_norm": 1.1054131984710693, |
| "learning_rate": 5.9379893383655006e-05, |
| "loss": 0.0613, |
| "step": 7020 |
| }, |
| { |
| "epoch": 3.064516129032258, |
| "grad_norm": 0.5404367446899414, |
| "learning_rate": 5.927159641138744e-05, |
| "loss": 0.0836, |
| "step": 7030 |
| }, |
| { |
| "epoch": 3.068875326939843, |
| "grad_norm": 0.3001430332660675, |
| "learning_rate": 5.916325437567799e-05, |
| "loss": 0.0697, |
| "step": 7040 |
| }, |
| { |
| "epoch": 3.073234524847428, |
| "grad_norm": 0.9972290396690369, |
| "learning_rate": 5.905486780310966e-05, |
| "loss": 0.0591, |
| "step": 7050 |
| }, |
| { |
| "epoch": 3.077593722755013, |
| "grad_norm": 1.5061616897583008, |
| "learning_rate": 5.8946437220481887e-05, |
| "loss": 0.0655, |
| "step": 7060 |
| }, |
| { |
| "epoch": 3.081952920662598, |
| "grad_norm": 1.0841275453567505, |
| "learning_rate": 5.883796315480805e-05, |
| "loss": 0.0667, |
| "step": 7070 |
| }, |
| { |
| "epoch": 3.086312118570183, |
| "grad_norm": 0.5677183270454407, |
| "learning_rate": 5.872944613331288e-05, |
| "loss": 0.0833, |
| "step": 7080 |
| }, |
| { |
| "epoch": 3.090671316477768, |
| "grad_norm": 0.5351662039756775, |
| "learning_rate": 5.862088668342986e-05, |
| "loss": 0.0667, |
| "step": 7090 |
| }, |
| { |
| "epoch": 3.095030514385353, |
| "grad_norm": 0.8425320982933044, |
| "learning_rate": 5.8512285332798714e-05, |
| "loss": 0.0604, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.099389712292938, |
| "grad_norm": 1.6601287126541138, |
| "learning_rate": 5.840364260926277e-05, |
| "loss": 0.0831, |
| "step": 7110 |
| }, |
| { |
| "epoch": 3.103748910200523, |
| "grad_norm": 0.8863855004310608, |
| "learning_rate": 5.8294959040866505e-05, |
| "loss": 0.0737, |
| "step": 7120 |
| }, |
| { |
| "epoch": 3.108108108108108, |
| "grad_norm": 0.8012006282806396, |
| "learning_rate": 5.818623515585292e-05, |
| "loss": 0.0662, |
| "step": 7130 |
| }, |
| { |
| "epoch": 3.1124673060156933, |
| "grad_norm": 5.165378570556641, |
| "learning_rate": 5.8077471482660896e-05, |
| "loss": 0.0806, |
| "step": 7140 |
| }, |
| { |
| "epoch": 3.1168265039232783, |
| "grad_norm": 1.3551973104476929, |
| "learning_rate": 5.796866854992276e-05, |
| "loss": 0.0844, |
| "step": 7150 |
| }, |
| { |
| "epoch": 3.1211857018308633, |
| "grad_norm": 0.6418676972389221, |
| "learning_rate": 5.7859826886461676e-05, |
| "loss": 0.0722, |
| "step": 7160 |
| }, |
| { |
| "epoch": 3.1255448997384483, |
| "grad_norm": 7.927807807922363, |
| "learning_rate": 5.775094702128899e-05, |
| "loss": 0.0773, |
| "step": 7170 |
| }, |
| { |
| "epoch": 3.1299040976460333, |
| "grad_norm": 0.8966444134712219, |
| "learning_rate": 5.7642029483601746e-05, |
| "loss": 0.0819, |
| "step": 7180 |
| }, |
| { |
| "epoch": 3.1342632955536183, |
| "grad_norm": 0.7477391362190247, |
| "learning_rate": 5.753307480278012e-05, |
| "loss": 0.0795, |
| "step": 7190 |
| }, |
| { |
| "epoch": 3.1386224934612033, |
| "grad_norm": 1.0312063694000244, |
| "learning_rate": 5.742408350838478e-05, |
| "loss": 0.0789, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.1429816913687882, |
| "grad_norm": 1.2121704816818237, |
| "learning_rate": 5.7315056130154374e-05, |
| "loss": 0.0768, |
| "step": 7210 |
| }, |
| { |
| "epoch": 3.1473408892763732, |
| "grad_norm": 0.6697542667388916, |
| "learning_rate": 5.720599319800292e-05, |
| "loss": 0.0704, |
| "step": 7220 |
| }, |
| { |
| "epoch": 3.151700087183958, |
| "grad_norm": 0.5981730818748474, |
| "learning_rate": 5.709689524201722e-05, |
| "loss": 0.069, |
| "step": 7230 |
| }, |
| { |
| "epoch": 3.156059285091543, |
| "grad_norm": 0.5300177931785583, |
| "learning_rate": 5.698776279245437e-05, |
| "loss": 0.0717, |
| "step": 7240 |
| }, |
| { |
| "epoch": 3.160418482999128, |
| "grad_norm": 0.4018495976924896, |
| "learning_rate": 5.6878596379739036e-05, |
| "loss": 0.0665, |
| "step": 7250 |
| }, |
| { |
| "epoch": 3.164777680906713, |
| "grad_norm": 2.6015779972076416, |
| "learning_rate": 5.676939653446103e-05, |
| "loss": 0.0604, |
| "step": 7260 |
| }, |
| { |
| "epoch": 3.169136878814298, |
| "grad_norm": 2.2606658935546875, |
| "learning_rate": 5.666016378737261e-05, |
| "loss": 0.0546, |
| "step": 7270 |
| }, |
| { |
| "epoch": 3.173496076721883, |
| "grad_norm": 0.53534996509552, |
| "learning_rate": 5.655089866938596e-05, |
| "loss": 0.0659, |
| "step": 7280 |
| }, |
| { |
| "epoch": 3.177855274629468, |
| "grad_norm": 0.6438631415367126, |
| "learning_rate": 5.6441601711570615e-05, |
| "loss": 0.0742, |
| "step": 7290 |
| }, |
| { |
| "epoch": 3.182214472537053, |
| "grad_norm": 0.7055009007453918, |
| "learning_rate": 5.633227344515085e-05, |
| "loss": 0.0702, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.186573670444638, |
| "grad_norm": 0.48063281178474426, |
| "learning_rate": 5.6222914401503116e-05, |
| "loss": 0.0529, |
| "step": 7310 |
| }, |
| { |
| "epoch": 3.190932868352223, |
| "grad_norm": 0.5260463356971741, |
| "learning_rate": 5.611352511215343e-05, |
| "loss": 0.0467, |
| "step": 7320 |
| }, |
| { |
| "epoch": 3.195292066259808, |
| "grad_norm": 1.3753858804702759, |
| "learning_rate": 5.600410610877488e-05, |
| "loss": 0.0662, |
| "step": 7330 |
| }, |
| { |
| "epoch": 3.199651264167393, |
| "grad_norm": 0.5484173893928528, |
| "learning_rate": 5.58946579231849e-05, |
| "loss": 0.0586, |
| "step": 7340 |
| }, |
| { |
| "epoch": 3.204010462074978, |
| "grad_norm": 0.7811186909675598, |
| "learning_rate": 5.578518108734279e-05, |
| "loss": 0.0744, |
| "step": 7350 |
| }, |
| { |
| "epoch": 3.208369659982563, |
| "grad_norm": 0.7245134711265564, |
| "learning_rate": 5.5675676133347096e-05, |
| "loss": 0.0878, |
| "step": 7360 |
| }, |
| { |
| "epoch": 3.212728857890148, |
| "grad_norm": 0.5464441776275635, |
| "learning_rate": 5.556614359343307e-05, |
| "loss": 0.0681, |
| "step": 7370 |
| }, |
| { |
| "epoch": 3.217088055797733, |
| "grad_norm": 1.7045855522155762, |
| "learning_rate": 5.545658399996999e-05, |
| "loss": 0.0709, |
| "step": 7380 |
| }, |
| { |
| "epoch": 3.221447253705318, |
| "grad_norm": 0.6500688791275024, |
| "learning_rate": 5.534699788545862e-05, |
| "loss": 0.0728, |
| "step": 7390 |
| }, |
| { |
| "epoch": 3.225806451612903, |
| "grad_norm": 16.512657165527344, |
| "learning_rate": 5.523738578252867e-05, |
| "loss": 0.0713, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.2301656495204885, |
| "grad_norm": 0.5367026329040527, |
| "learning_rate": 5.512774822393614e-05, |
| "loss": 0.0689, |
| "step": 7410 |
| }, |
| { |
| "epoch": 3.234524847428073, |
| "grad_norm": 5.1868181228637695, |
| "learning_rate": 5.5018085742560744e-05, |
| "loss": 0.0924, |
| "step": 7420 |
| }, |
| { |
| "epoch": 3.2388840453356584, |
| "grad_norm": 0.6594675779342651, |
| "learning_rate": 5.4908398871403365e-05, |
| "loss": 0.0745, |
| "step": 7430 |
| }, |
| { |
| "epoch": 3.2432432432432434, |
| "grad_norm": 2.2696409225463867, |
| "learning_rate": 5.4798688143583375e-05, |
| "loss": 0.0768, |
| "step": 7440 |
| }, |
| { |
| "epoch": 3.2476024411508284, |
| "grad_norm": 0.6064624190330505, |
| "learning_rate": 5.468895409233615e-05, |
| "loss": 0.0701, |
| "step": 7450 |
| }, |
| { |
| "epoch": 3.2519616390584134, |
| "grad_norm": 0.9498799443244934, |
| "learning_rate": 5.4579197251010414e-05, |
| "loss": 0.0661, |
| "step": 7460 |
| }, |
| { |
| "epoch": 3.2563208369659984, |
| "grad_norm": 0.8516326546669006, |
| "learning_rate": 5.446941815306563e-05, |
| "loss": 0.0715, |
| "step": 7470 |
| }, |
| { |
| "epoch": 3.2606800348735834, |
| "grad_norm": 0.7903364300727844, |
| "learning_rate": 5.435961733206947e-05, |
| "loss": 0.0675, |
| "step": 7480 |
| }, |
| { |
| "epoch": 3.2650392327811684, |
| "grad_norm": 0.40599560737609863, |
| "learning_rate": 5.424979532169516e-05, |
| "loss": 0.0683, |
| "step": 7490 |
| }, |
| { |
| "epoch": 3.2693984306887534, |
| "grad_norm": 0.4230225086212158, |
| "learning_rate": 5.413995265571895e-05, |
| "loss": 0.0585, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.2737576285963383, |
| "grad_norm": 11.551130294799805, |
| "learning_rate": 5.403008986801746e-05, |
| "loss": 0.0713, |
| "step": 7510 |
| }, |
| { |
| "epoch": 3.2781168265039233, |
| "grad_norm": 0.5420308113098145, |
| "learning_rate": 5.3920207492565114e-05, |
| "loss": 0.0618, |
| "step": 7520 |
| }, |
| { |
| "epoch": 3.2824760244115083, |
| "grad_norm": 3.584078073501587, |
| "learning_rate": 5.381030606343154e-05, |
| "loss": 0.0558, |
| "step": 7530 |
| }, |
| { |
| "epoch": 3.2868352223190933, |
| "grad_norm": 1.0643659830093384, |
| "learning_rate": 5.370038611477894e-05, |
| "loss": 0.0715, |
| "step": 7540 |
| }, |
| { |
| "epoch": 3.2911944202266783, |
| "grad_norm": 0.557769775390625, |
| "learning_rate": 5.359044818085963e-05, |
| "loss": 0.0676, |
| "step": 7550 |
| }, |
| { |
| "epoch": 3.2955536181342633, |
| "grad_norm": 2.791323184967041, |
| "learning_rate": 5.3480492796013214e-05, |
| "loss": 0.0694, |
| "step": 7560 |
| }, |
| { |
| "epoch": 3.2999128160418483, |
| "grad_norm": 0.6536844372749329, |
| "learning_rate": 5.33705204946642e-05, |
| "loss": 0.0604, |
| "step": 7570 |
| }, |
| { |
| "epoch": 3.3042720139494333, |
| "grad_norm": 0.44391271471977234, |
| "learning_rate": 5.326053181131927e-05, |
| "loss": 0.0781, |
| "step": 7580 |
| }, |
| { |
| "epoch": 3.3086312118570183, |
| "grad_norm": 2.1630494594573975, |
| "learning_rate": 5.3150527280564776e-05, |
| "loss": 0.0801, |
| "step": 7590 |
| }, |
| { |
| "epoch": 3.3129904097646032, |
| "grad_norm": 3.6389074325561523, |
| "learning_rate": 5.3040507437064034e-05, |
| "loss": 0.0702, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.3173496076721882, |
| "grad_norm": 3.0503857135772705, |
| "learning_rate": 5.293047281555482e-05, |
| "loss": 0.0642, |
| "step": 7610 |
| }, |
| { |
| "epoch": 3.321708805579773, |
| "grad_norm": 0.6594390869140625, |
| "learning_rate": 5.2820423950846765e-05, |
| "loss": 0.0749, |
| "step": 7620 |
| }, |
| { |
| "epoch": 3.326068003487358, |
| "grad_norm": 2.7960643768310547, |
| "learning_rate": 5.2710361377818696e-05, |
| "loss": 0.082, |
| "step": 7630 |
| }, |
| { |
| "epoch": 3.330427201394943, |
| "grad_norm": 5.0908894538879395, |
| "learning_rate": 5.2600285631416026e-05, |
| "loss": 0.0732, |
| "step": 7640 |
| }, |
| { |
| "epoch": 3.334786399302528, |
| "grad_norm": 1.8586153984069824, |
| "learning_rate": 5.249019724664826e-05, |
| "loss": 0.0621, |
| "step": 7650 |
| }, |
| { |
| "epoch": 3.339145597210113, |
| "grad_norm": 2.7996575832366943, |
| "learning_rate": 5.2380096758586315e-05, |
| "loss": 0.0737, |
| "step": 7660 |
| }, |
| { |
| "epoch": 3.343504795117698, |
| "grad_norm": 9.720712661743164, |
| "learning_rate": 5.226998470235993e-05, |
| "loss": 0.0774, |
| "step": 7670 |
| }, |
| { |
| "epoch": 3.3478639930252836, |
| "grad_norm": 0.7286834120750427, |
| "learning_rate": 5.215986161315507e-05, |
| "loss": 0.0827, |
| "step": 7680 |
| }, |
| { |
| "epoch": 3.352223190932868, |
| "grad_norm": 0.6988757848739624, |
| "learning_rate": 5.20497280262113e-05, |
| "loss": 0.0679, |
| "step": 7690 |
| }, |
| { |
| "epoch": 3.3565823888404536, |
| "grad_norm": 0.6899250149726868, |
| "learning_rate": 5.193958447681924e-05, |
| "loss": 0.0618, |
| "step": 7700 |
| }, |
| { |
| "epoch": 3.360941586748038, |
| "grad_norm": 0.8897220492362976, |
| "learning_rate": 5.182943150031793e-05, |
| "loss": 0.0766, |
| "step": 7710 |
| }, |
| { |
| "epoch": 3.3653007846556235, |
| "grad_norm": 0.7581049799919128, |
| "learning_rate": 5.1719269632092204e-05, |
| "loss": 0.0819, |
| "step": 7720 |
| }, |
| { |
| "epoch": 3.3696599825632085, |
| "grad_norm": 0.6817976832389832, |
| "learning_rate": 5.160909940757015e-05, |
| "loss": 0.0769, |
| "step": 7730 |
| }, |
| { |
| "epoch": 3.3740191804707935, |
| "grad_norm": 0.8774738311767578, |
| "learning_rate": 5.149892136222043e-05, |
| "loss": 0.0631, |
| "step": 7740 |
| }, |
| { |
| "epoch": 3.3783783783783785, |
| "grad_norm": 0.7809702157974243, |
| "learning_rate": 5.1388736031549744e-05, |
| "loss": 0.0583, |
| "step": 7750 |
| }, |
| { |
| "epoch": 3.3827375762859635, |
| "grad_norm": 1.1148380041122437, |
| "learning_rate": 5.127854395110021e-05, |
| "loss": 0.0675, |
| "step": 7760 |
| }, |
| { |
| "epoch": 3.3870967741935485, |
| "grad_norm": 0.7259299159049988, |
| "learning_rate": 5.116834565644671e-05, |
| "loss": 0.0636, |
| "step": 7770 |
| }, |
| { |
| "epoch": 3.3914559721011335, |
| "grad_norm": 0.5108294486999512, |
| "learning_rate": 5.10581416831944e-05, |
| "loss": 0.0588, |
| "step": 7780 |
| }, |
| { |
| "epoch": 3.3958151700087185, |
| "grad_norm": 0.6647517681121826, |
| "learning_rate": 5.094793256697593e-05, |
| "loss": 0.0606, |
| "step": 7790 |
| }, |
| { |
| "epoch": 3.4001743679163035, |
| "grad_norm": 0.4952324330806732, |
| "learning_rate": 5.0837718843449075e-05, |
| "loss": 0.062, |
| "step": 7800 |
| }, |
| { |
| "epoch": 3.4045335658238884, |
| "grad_norm": 3.1278605461120605, |
| "learning_rate": 5.07275010482939e-05, |
| "loss": 0.0645, |
| "step": 7810 |
| }, |
| { |
| "epoch": 3.4088927637314734, |
| "grad_norm": 1.1586763858795166, |
| "learning_rate": 5.061727971721032e-05, |
| "loss": 0.0551, |
| "step": 7820 |
| }, |
| { |
| "epoch": 3.4132519616390584, |
| "grad_norm": 0.4465099275112152, |
| "learning_rate": 5.050705538591538e-05, |
| "loss": 0.0567, |
| "step": 7830 |
| }, |
| { |
| "epoch": 3.4176111595466434, |
| "grad_norm": 0.5040956735610962, |
| "learning_rate": 5.0396828590140785e-05, |
| "loss": 0.0518, |
| "step": 7840 |
| }, |
| { |
| "epoch": 3.4219703574542284, |
| "grad_norm": 0.5282771587371826, |
| "learning_rate": 5.0286599865630157e-05, |
| "loss": 0.0653, |
| "step": 7850 |
| }, |
| { |
| "epoch": 3.4263295553618134, |
| "grad_norm": 0.5432827472686768, |
| "learning_rate": 5.017636974813649e-05, |
| "loss": 0.0698, |
| "step": 7860 |
| }, |
| { |
| "epoch": 3.4306887532693984, |
| "grad_norm": 0.6134181618690491, |
| "learning_rate": 5.006613877341959e-05, |
| "loss": 0.0699, |
| "step": 7870 |
| }, |
| { |
| "epoch": 3.4350479511769834, |
| "grad_norm": 0.490448534488678, |
| "learning_rate": 4.99559074772434e-05, |
| "loss": 0.0621, |
| "step": 7880 |
| }, |
| { |
| "epoch": 3.4394071490845683, |
| "grad_norm": 0.5438141226768494, |
| "learning_rate": 4.9845676395373455e-05, |
| "loss": 0.0551, |
| "step": 7890 |
| }, |
| { |
| "epoch": 3.4437663469921533, |
| "grad_norm": 0.5387142896652222, |
| "learning_rate": 4.9735446063574184e-05, |
| "loss": 0.0668, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.4481255448997383, |
| "grad_norm": 0.5340880751609802, |
| "learning_rate": 4.962521701760645e-05, |
| "loss": 0.0617, |
| "step": 7910 |
| }, |
| { |
| "epoch": 3.4524847428073233, |
| "grad_norm": 0.6971394419670105, |
| "learning_rate": 4.951498979322482e-05, |
| "loss": 0.0806, |
| "step": 7920 |
| }, |
| { |
| "epoch": 3.4568439407149083, |
| "grad_norm": 0.5765938758850098, |
| "learning_rate": 4.9404764926174996e-05, |
| "loss": 0.0576, |
| "step": 7930 |
| }, |
| { |
| "epoch": 3.4612031386224933, |
| "grad_norm": 0.4569430351257324, |
| "learning_rate": 4.929454295219127e-05, |
| "loss": 0.0558, |
| "step": 7940 |
| }, |
| { |
| "epoch": 3.4655623365300783, |
| "grad_norm": 1.1662955284118652, |
| "learning_rate": 4.9184324406993844e-05, |
| "loss": 0.0517, |
| "step": 7950 |
| }, |
| { |
| "epoch": 3.4699215344376633, |
| "grad_norm": 0.9679137468338013, |
| "learning_rate": 4.907410982628623e-05, |
| "loss": 0.0578, |
| "step": 7960 |
| }, |
| { |
| "epoch": 3.4742807323452487, |
| "grad_norm": 0.47717949748039246, |
| "learning_rate": 4.896389974575273e-05, |
| "loss": 0.0629, |
| "step": 7970 |
| }, |
| { |
| "epoch": 3.4786399302528332, |
| "grad_norm": 1.1076433658599854, |
| "learning_rate": 4.885369470105571e-05, |
| "loss": 0.0611, |
| "step": 7980 |
| }, |
| { |
| "epoch": 3.4829991281604187, |
| "grad_norm": 0.634964644908905, |
| "learning_rate": 4.874349522783313e-05, |
| "loss": 0.06, |
| "step": 7990 |
| }, |
| { |
| "epoch": 3.4873583260680037, |
| "grad_norm": 0.566712498664856, |
| "learning_rate": 4.863330186169581e-05, |
| "loss": 0.058, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.4917175239755887, |
| "grad_norm": 0.4812932312488556, |
| "learning_rate": 4.8523115138224885e-05, |
| "loss": 0.0512, |
| "step": 8010 |
| }, |
| { |
| "epoch": 3.4960767218831736, |
| "grad_norm": 0.5662280917167664, |
| "learning_rate": 4.841293559296928e-05, |
| "loss": 0.058, |
| "step": 8020 |
| }, |
| { |
| "epoch": 3.5004359197907586, |
| "grad_norm": 0.5453780889511108, |
| "learning_rate": 4.830276376144295e-05, |
| "loss": 0.056, |
| "step": 8030 |
| }, |
| { |
| "epoch": 3.5047951176983436, |
| "grad_norm": 0.6273401975631714, |
| "learning_rate": 4.819260017912237e-05, |
| "loss": 0.0438, |
| "step": 8040 |
| }, |
| { |
| "epoch": 3.5091543156059286, |
| "grad_norm": 0.5913659930229187, |
| "learning_rate": 4.808244538144396e-05, |
| "loss": 0.0583, |
| "step": 8050 |
| }, |
| { |
| "epoch": 3.5135135135135136, |
| "grad_norm": 0.46796709299087524, |
| "learning_rate": 4.797229990380142e-05, |
| "loss": 0.0626, |
| "step": 8060 |
| }, |
| { |
| "epoch": 3.5178727114210986, |
| "grad_norm": 0.633021891117096, |
| "learning_rate": 4.786216428154317e-05, |
| "loss": 0.0468, |
| "step": 8070 |
| }, |
| { |
| "epoch": 3.5222319093286836, |
| "grad_norm": 0.8615811467170715, |
| "learning_rate": 4.7752039049969685e-05, |
| "loss": 0.0716, |
| "step": 8080 |
| }, |
| { |
| "epoch": 3.5265911072362686, |
| "grad_norm": 0.5270361304283142, |
| "learning_rate": 4.7641924744330956e-05, |
| "loss": 0.0555, |
| "step": 8090 |
| }, |
| { |
| "epoch": 3.5309503051438536, |
| "grad_norm": 0.6436976790428162, |
| "learning_rate": 4.7531821899823925e-05, |
| "loss": 0.0589, |
| "step": 8100 |
| }, |
| { |
| "epoch": 3.5353095030514385, |
| "grad_norm": 0.5558163523674011, |
| "learning_rate": 4.742173105158973e-05, |
| "loss": 0.0704, |
| "step": 8110 |
| }, |
| { |
| "epoch": 3.5396687009590235, |
| "grad_norm": 0.4449492394924164, |
| "learning_rate": 4.731165273471129e-05, |
| "loss": 0.0515, |
| "step": 8120 |
| }, |
| { |
| "epoch": 3.5440278988666085, |
| "grad_norm": 1.0276836156845093, |
| "learning_rate": 4.720158748421057e-05, |
| "loss": 0.074, |
| "step": 8130 |
| }, |
| { |
| "epoch": 3.5483870967741935, |
| "grad_norm": 0.6685786247253418, |
| "learning_rate": 4.709153583504602e-05, |
| "loss": 0.0642, |
| "step": 8140 |
| }, |
| { |
| "epoch": 3.5527462946817785, |
| "grad_norm": 0.7969890832901001, |
| "learning_rate": 4.6981498322110027e-05, |
| "loss": 0.0643, |
| "step": 8150 |
| }, |
| { |
| "epoch": 3.5571054925893635, |
| "grad_norm": 0.8791037797927856, |
| "learning_rate": 4.6871475480226256e-05, |
| "loss": 0.0726, |
| "step": 8160 |
| }, |
| { |
| "epoch": 3.5614646904969485, |
| "grad_norm": 0.6621007919311523, |
| "learning_rate": 4.6761467844147004e-05, |
| "loss": 0.0629, |
| "step": 8170 |
| }, |
| { |
| "epoch": 3.5658238884045335, |
| "grad_norm": 1.726035237312317, |
| "learning_rate": 4.665147594855076e-05, |
| "loss": 0.0568, |
| "step": 8180 |
| }, |
| { |
| "epoch": 3.5701830863121184, |
| "grad_norm": 0.7457987666130066, |
| "learning_rate": 4.654150032803943e-05, |
| "loss": 0.0599, |
| "step": 8190 |
| }, |
| { |
| "epoch": 3.5745422842197034, |
| "grad_norm": 0.4260644018650055, |
| "learning_rate": 4.643154151713588e-05, |
| "loss": 0.0534, |
| "step": 8200 |
| }, |
| { |
| "epoch": 3.5789014821272884, |
| "grad_norm": 0.5488954186439514, |
| "learning_rate": 4.6321600050281225e-05, |
| "loss": 0.0448, |
| "step": 8210 |
| }, |
| { |
| "epoch": 3.583260680034874, |
| "grad_norm": 0.7686980962753296, |
| "learning_rate": 4.6211676461832264e-05, |
| "loss": 0.055, |
| "step": 8220 |
| }, |
| { |
| "epoch": 3.5876198779424584, |
| "grad_norm": 2.9167089462280273, |
| "learning_rate": 4.610177128605899e-05, |
| "loss": 0.0673, |
| "step": 8230 |
| }, |
| { |
| "epoch": 3.591979075850044, |
| "grad_norm": 0.48421207070350647, |
| "learning_rate": 4.599188505714184e-05, |
| "loss": 0.0573, |
| "step": 8240 |
| }, |
| { |
| "epoch": 3.5963382737576284, |
| "grad_norm": 0.742896318435669, |
| "learning_rate": 4.588201830916912e-05, |
| "loss": 0.0541, |
| "step": 8250 |
| }, |
| { |
| "epoch": 3.600697471665214, |
| "grad_norm": 0.7824766635894775, |
| "learning_rate": 4.577217157613456e-05, |
| "loss": 0.0538, |
| "step": 8260 |
| }, |
| { |
| "epoch": 3.6050566695727984, |
| "grad_norm": 2.995638608932495, |
| "learning_rate": 4.566234539193452e-05, |
| "loss": 0.0522, |
| "step": 8270 |
| }, |
| { |
| "epoch": 3.609415867480384, |
| "grad_norm": 0.5708383917808533, |
| "learning_rate": 4.555254029036555e-05, |
| "loss": 0.0701, |
| "step": 8280 |
| }, |
| { |
| "epoch": 3.6137750653879688, |
| "grad_norm": 15.95019245147705, |
| "learning_rate": 4.544275680512165e-05, |
| "loss": 0.0678, |
| "step": 8290 |
| }, |
| { |
| "epoch": 3.6181342632955538, |
| "grad_norm": 0.5620335936546326, |
| "learning_rate": 4.5332995469791836e-05, |
| "loss": 0.0822, |
| "step": 8300 |
| }, |
| { |
| "epoch": 3.6224934612031388, |
| "grad_norm": 8.422737121582031, |
| "learning_rate": 4.522325681785744e-05, |
| "loss": 0.055, |
| "step": 8310 |
| }, |
| { |
| "epoch": 3.6268526591107237, |
| "grad_norm": 0.734219491481781, |
| "learning_rate": 4.511354138268952e-05, |
| "loss": 0.0472, |
| "step": 8320 |
| }, |
| { |
| "epoch": 3.6312118570183087, |
| "grad_norm": 2.058011293411255, |
| "learning_rate": 4.50038496975463e-05, |
| "loss": 0.055, |
| "step": 8330 |
| }, |
| { |
| "epoch": 3.6355710549258937, |
| "grad_norm": 0.9331876039505005, |
| "learning_rate": 4.489418229557063e-05, |
| "loss": 0.0454, |
| "step": 8340 |
| }, |
| { |
| "epoch": 3.6399302528334787, |
| "grad_norm": 0.7415900826454163, |
| "learning_rate": 4.478453970978722e-05, |
| "loss": 0.0605, |
| "step": 8350 |
| }, |
| { |
| "epoch": 3.6442894507410637, |
| "grad_norm": 0.7572534084320068, |
| "learning_rate": 4.4674922473100286e-05, |
| "loss": 0.0568, |
| "step": 8360 |
| }, |
| { |
| "epoch": 3.6486486486486487, |
| "grad_norm": 0.7530124187469482, |
| "learning_rate": 4.4565331118290756e-05, |
| "loss": 0.0499, |
| "step": 8370 |
| }, |
| { |
| "epoch": 3.6530078465562337, |
| "grad_norm": 1.116607666015625, |
| "learning_rate": 4.4455766178013775e-05, |
| "loss": 0.0596, |
| "step": 8380 |
| }, |
| { |
| "epoch": 3.6573670444638187, |
| "grad_norm": 0.5457643270492554, |
| "learning_rate": 4.434622818479615e-05, |
| "loss": 0.0648, |
| "step": 8390 |
| }, |
| { |
| "epoch": 3.6617262423714037, |
| "grad_norm": 0.6090409755706787, |
| "learning_rate": 4.4236717671033646e-05, |
| "loss": 0.0584, |
| "step": 8400 |
| }, |
| { |
| "epoch": 3.6660854402789886, |
| "grad_norm": 0.4845934212207794, |
| "learning_rate": 4.412723516898853e-05, |
| "loss": 0.0548, |
| "step": 8410 |
| }, |
| { |
| "epoch": 3.6704446381865736, |
| "grad_norm": 0.7643135786056519, |
| "learning_rate": 4.40177812107869e-05, |
| "loss": 0.0488, |
| "step": 8420 |
| }, |
| { |
| "epoch": 3.6748038360941586, |
| "grad_norm": 0.6320275068283081, |
| "learning_rate": 4.390835632841606e-05, |
| "loss": 0.0501, |
| "step": 8430 |
| }, |
| { |
| "epoch": 3.6791630340017436, |
| "grad_norm": 1.2610498666763306, |
| "learning_rate": 4.3798961053722115e-05, |
| "loss": 0.0558, |
| "step": 8440 |
| }, |
| { |
| "epoch": 3.6835222319093286, |
| "grad_norm": 0.3895055651664734, |
| "learning_rate": 4.368959591840718e-05, |
| "loss": 0.0478, |
| "step": 8450 |
| }, |
| { |
| "epoch": 3.6878814298169136, |
| "grad_norm": 0.45858561992645264, |
| "learning_rate": 4.3580261454026865e-05, |
| "loss": 0.0459, |
| "step": 8460 |
| }, |
| { |
| "epoch": 3.6922406277244986, |
| "grad_norm": 1.6598986387252808, |
| "learning_rate": 4.3470958191987786e-05, |
| "loss": 0.051, |
| "step": 8470 |
| }, |
| { |
| "epoch": 3.6965998256320836, |
| "grad_norm": 0.7669274806976318, |
| "learning_rate": 4.336168666354484e-05, |
| "loss": 0.0455, |
| "step": 8480 |
| }, |
| { |
| "epoch": 3.7009590235396685, |
| "grad_norm": 1.024935007095337, |
| "learning_rate": 4.325244739979873e-05, |
| "loss": 0.0618, |
| "step": 8490 |
| }, |
| { |
| "epoch": 3.7053182214472535, |
| "grad_norm": 0.9452399611473083, |
| "learning_rate": 4.314324093169332e-05, |
| "loss": 0.0701, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.709677419354839, |
| "grad_norm": 0.4857301414012909, |
| "learning_rate": 4.303406779001302e-05, |
| "loss": 0.054, |
| "step": 8510 |
| }, |
| { |
| "epoch": 3.7140366172624235, |
| "grad_norm": 0.5200791954994202, |
| "learning_rate": 4.292492850538038e-05, |
| "loss": 0.0461, |
| "step": 8520 |
| }, |
| { |
| "epoch": 3.718395815170009, |
| "grad_norm": 8.943376541137695, |
| "learning_rate": 4.28158236082533e-05, |
| "loss": 0.0594, |
| "step": 8530 |
| }, |
| { |
| "epoch": 3.7227550130775935, |
| "grad_norm": 0.6628287434577942, |
| "learning_rate": 4.270675362892256e-05, |
| "loss": 0.0525, |
| "step": 8540 |
| }, |
| { |
| "epoch": 3.727114210985179, |
| "grad_norm": 0.5522739291191101, |
| "learning_rate": 4.2597719097509246e-05, |
| "loss": 0.0456, |
| "step": 8550 |
| }, |
| { |
| "epoch": 3.7314734088927635, |
| "grad_norm": 0.5528436899185181, |
| "learning_rate": 4.2488720543962146e-05, |
| "loss": 0.0628, |
| "step": 8560 |
| }, |
| { |
| "epoch": 3.735832606800349, |
| "grad_norm": 0.5776464939117432, |
| "learning_rate": 4.23797584980552e-05, |
| "loss": 0.0832, |
| "step": 8570 |
| }, |
| { |
| "epoch": 3.740191804707934, |
| "grad_norm": 0.8870580196380615, |
| "learning_rate": 4.227083348938486e-05, |
| "loss": 0.0701, |
| "step": 8580 |
| }, |
| { |
| "epoch": 3.744551002615519, |
| "grad_norm": 0.5437199473381042, |
| "learning_rate": 4.2161946047367586e-05, |
| "loss": 0.055, |
| "step": 8590 |
| }, |
| { |
| "epoch": 3.748910200523104, |
| "grad_norm": 0.4483864903450012, |
| "learning_rate": 4.2053096701237294e-05, |
| "loss": 0.0482, |
| "step": 8600 |
| }, |
| { |
| "epoch": 3.753269398430689, |
| "grad_norm": 0.6559789776802063, |
| "learning_rate": 4.1944285980042656e-05, |
| "loss": 0.047, |
| "step": 8610 |
| }, |
| { |
| "epoch": 3.757628596338274, |
| "grad_norm": 0.5018060803413391, |
| "learning_rate": 4.183551441264469e-05, |
| "loss": 0.0577, |
| "step": 8620 |
| }, |
| { |
| "epoch": 3.761987794245859, |
| "grad_norm": 0.6484914422035217, |
| "learning_rate": 4.172678252771408e-05, |
| "loss": 0.0527, |
| "step": 8630 |
| }, |
| { |
| "epoch": 3.766346992153444, |
| "grad_norm": 0.6902757883071899, |
| "learning_rate": 4.16180908537286e-05, |
| "loss": 0.0597, |
| "step": 8640 |
| }, |
| { |
| "epoch": 3.770706190061029, |
| "grad_norm": 0.7869307398796082, |
| "learning_rate": 4.150943991897065e-05, |
| "loss": 0.0563, |
| "step": 8650 |
| }, |
| { |
| "epoch": 3.775065387968614, |
| "grad_norm": 0.6430687308311462, |
| "learning_rate": 4.1400830251524605e-05, |
| "loss": 0.0558, |
| "step": 8660 |
| }, |
| { |
| "epoch": 3.779424585876199, |
| "grad_norm": 0.5719813704490662, |
| "learning_rate": 4.1292262379274215e-05, |
| "loss": 0.0464, |
| "step": 8670 |
| }, |
| { |
| "epoch": 3.7837837837837838, |
| "grad_norm": 0.4877850413322449, |
| "learning_rate": 4.118373682990016e-05, |
| "loss": 0.0411, |
| "step": 8680 |
| }, |
| { |
| "epoch": 3.7881429816913688, |
| "grad_norm": 0.48677220940589905, |
| "learning_rate": 4.107525413087737e-05, |
| "loss": 0.0499, |
| "step": 8690 |
| }, |
| { |
| "epoch": 3.7925021795989537, |
| "grad_norm": 0.2930895984172821, |
| "learning_rate": 4.096681480947252e-05, |
| "loss": 0.0491, |
| "step": 8700 |
| }, |
| { |
| "epoch": 3.7968613775065387, |
| "grad_norm": 0.690412700176239, |
| "learning_rate": 4.085841939274146e-05, |
| "loss": 0.0538, |
| "step": 8710 |
| }, |
| { |
| "epoch": 3.8012205754141237, |
| "grad_norm": 0.8729444146156311, |
| "learning_rate": 4.075006840752662e-05, |
| "loss": 0.0756, |
| "step": 8720 |
| }, |
| { |
| "epoch": 3.8055797733217087, |
| "grad_norm": 0.40643206238746643, |
| "learning_rate": 4.0641762380454515e-05, |
| "loss": 0.0443, |
| "step": 8730 |
| }, |
| { |
| "epoch": 3.8099389712292937, |
| "grad_norm": 0.5322911143302917, |
| "learning_rate": 4.0533501837933134e-05, |
| "loss": 0.0514, |
| "step": 8740 |
| }, |
| { |
| "epoch": 3.8142981691368787, |
| "grad_norm": 0.47203385829925537, |
| "learning_rate": 4.042528730614936e-05, |
| "loss": 0.0549, |
| "step": 8750 |
| }, |
| { |
| "epoch": 3.8186573670444637, |
| "grad_norm": 0.5504468679428101, |
| "learning_rate": 4.0317119311066486e-05, |
| "loss": 0.0564, |
| "step": 8760 |
| }, |
| { |
| "epoch": 3.8230165649520487, |
| "grad_norm": 0.5727183818817139, |
| "learning_rate": 4.02089983784216e-05, |
| "loss": 0.0543, |
| "step": 8770 |
| }, |
| { |
| "epoch": 3.827375762859634, |
| "grad_norm": 0.6335930824279785, |
| "learning_rate": 4.010092503372309e-05, |
| "loss": 0.0659, |
| "step": 8780 |
| }, |
| { |
| "epoch": 3.8317349607672186, |
| "grad_norm": 0.5915783047676086, |
| "learning_rate": 3.999289980224797e-05, |
| "loss": 0.0499, |
| "step": 8790 |
| }, |
| { |
| "epoch": 3.836094158674804, |
| "grad_norm": 0.4834582209587097, |
| "learning_rate": 3.9884923209039455e-05, |
| "loss": 0.0393, |
| "step": 8800 |
| }, |
| { |
| "epoch": 3.8404533565823886, |
| "grad_norm": 1.361459732055664, |
| "learning_rate": 3.977699577890439e-05, |
| "loss": 0.0428, |
| "step": 8810 |
| }, |
| { |
| "epoch": 3.844812554489974, |
| "grad_norm": 1.9913321733474731, |
| "learning_rate": 3.96691180364106e-05, |
| "loss": 0.0473, |
| "step": 8820 |
| }, |
| { |
| "epoch": 3.8491717523975586, |
| "grad_norm": 0.4347272217273712, |
| "learning_rate": 3.956129050588446e-05, |
| "loss": 0.0465, |
| "step": 8830 |
| }, |
| { |
| "epoch": 3.853530950305144, |
| "grad_norm": 0.38418328762054443, |
| "learning_rate": 3.9453513711408275e-05, |
| "loss": 0.0448, |
| "step": 8840 |
| }, |
| { |
| "epoch": 3.857890148212729, |
| "grad_norm": 0.6460165977478027, |
| "learning_rate": 3.934578817681774e-05, |
| "loss": 0.0431, |
| "step": 8850 |
| }, |
| { |
| "epoch": 3.862249346120314, |
| "grad_norm": 0.5161862969398499, |
| "learning_rate": 3.9238114425699465e-05, |
| "loss": 0.0451, |
| "step": 8860 |
| }, |
| { |
| "epoch": 3.866608544027899, |
| "grad_norm": 0.6148392558097839, |
| "learning_rate": 3.91304929813883e-05, |
| "loss": 0.0453, |
| "step": 8870 |
| }, |
| { |
| "epoch": 3.870967741935484, |
| "grad_norm": 0.794267475605011, |
| "learning_rate": 3.902292436696489e-05, |
| "loss": 0.0582, |
| "step": 8880 |
| }, |
| { |
| "epoch": 3.875326939843069, |
| "grad_norm": 3.6313283443450928, |
| "learning_rate": 3.891540910525316e-05, |
| "loss": 0.0528, |
| "step": 8890 |
| }, |
| { |
| "epoch": 3.879686137750654, |
| "grad_norm": 7.387906074523926, |
| "learning_rate": 3.8807947718817624e-05, |
| "loss": 0.0534, |
| "step": 8900 |
| }, |
| { |
| "epoch": 3.884045335658239, |
| "grad_norm": 0.6750633716583252, |
| "learning_rate": 3.870054072996103e-05, |
| "loss": 0.0433, |
| "step": 8910 |
| }, |
| { |
| "epoch": 3.888404533565824, |
| "grad_norm": 0.7969902753829956, |
| "learning_rate": 3.859318866072168e-05, |
| "loss": 0.0602, |
| "step": 8920 |
| }, |
| { |
| "epoch": 3.892763731473409, |
| "grad_norm": 0.44962191581726074, |
| "learning_rate": 3.8485892032870965e-05, |
| "loss": 0.0485, |
| "step": 8930 |
| }, |
| { |
| "epoch": 3.897122929380994, |
| "grad_norm": 0.45624884963035583, |
| "learning_rate": 3.83786513679108e-05, |
| "loss": 0.0482, |
| "step": 8940 |
| }, |
| { |
| "epoch": 3.901482127288579, |
| "grad_norm": 4.285119533538818, |
| "learning_rate": 3.8271467187071134e-05, |
| "loss": 0.047, |
| "step": 8950 |
| }, |
| { |
| "epoch": 3.905841325196164, |
| "grad_norm": 0.7174338698387146, |
| "learning_rate": 3.816434001130732e-05, |
| "loss": 0.0465, |
| "step": 8960 |
| }, |
| { |
| "epoch": 3.910200523103749, |
| "grad_norm": 2.061223268508911, |
| "learning_rate": 3.8057270361297706e-05, |
| "loss": 0.044, |
| "step": 8970 |
| }, |
| { |
| "epoch": 3.914559721011334, |
| "grad_norm": 0.5447198152542114, |
| "learning_rate": 3.7950258757440985e-05, |
| "loss": 0.0481, |
| "step": 8980 |
| }, |
| { |
| "epoch": 3.918918918918919, |
| "grad_norm": 0.40096163749694824, |
| "learning_rate": 3.78433057198538e-05, |
| "loss": 0.0627, |
| "step": 8990 |
| }, |
| { |
| "epoch": 3.923278116826504, |
| "grad_norm": 0.5488551259040833, |
| "learning_rate": 3.773641176836807e-05, |
| "loss": 0.0513, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.927637314734089, |
| "grad_norm": 0.5273767709732056, |
| "learning_rate": 3.7629577422528555e-05, |
| "loss": 0.0628, |
| "step": 9010 |
| }, |
| { |
| "epoch": 3.931996512641674, |
| "grad_norm": 1.203155755996704, |
| "learning_rate": 3.7522803201590325e-05, |
| "loss": 0.043, |
| "step": 9020 |
| }, |
| { |
| "epoch": 3.936355710549259, |
| "grad_norm": 0.4476049244403839, |
| "learning_rate": 3.741608962451621e-05, |
| "loss": 0.0557, |
| "step": 9030 |
| }, |
| { |
| "epoch": 3.940714908456844, |
| "grad_norm": 0.6361905336380005, |
| "learning_rate": 3.730943720997427e-05, |
| "loss": 0.0533, |
| "step": 9040 |
| }, |
| { |
| "epoch": 3.945074106364429, |
| "grad_norm": 0.361251562833786, |
| "learning_rate": 3.720284647633532e-05, |
| "loss": 0.0415, |
| "step": 9050 |
| }, |
| { |
| "epoch": 3.949433304272014, |
| "grad_norm": 1.1388883590698242, |
| "learning_rate": 3.7096317941670365e-05, |
| "loss": 0.0549, |
| "step": 9060 |
| }, |
| { |
| "epoch": 3.953792502179599, |
| "grad_norm": 0.37069207429885864, |
| "learning_rate": 3.698985212374814e-05, |
| "loss": 0.0386, |
| "step": 9070 |
| }, |
| { |
| "epoch": 3.9581517000871838, |
| "grad_norm": 0.4507943391799927, |
| "learning_rate": 3.6883449540032477e-05, |
| "loss": 0.0491, |
| "step": 9080 |
| }, |
| { |
| "epoch": 3.962510897994769, |
| "grad_norm": 0.32320085167884827, |
| "learning_rate": 3.6777110707679905e-05, |
| "loss": 0.0499, |
| "step": 9090 |
| }, |
| { |
| "epoch": 3.9668700959023537, |
| "grad_norm": 1.666203260421753, |
| "learning_rate": 3.667083614353715e-05, |
| "loss": 0.0551, |
| "step": 9100 |
| }, |
| { |
| "epoch": 3.971229293809939, |
| "grad_norm": 0.42759859561920166, |
| "learning_rate": 3.6564626364138465e-05, |
| "loss": 0.0533, |
| "step": 9110 |
| }, |
| { |
| "epoch": 3.9755884917175237, |
| "grad_norm": 0.539943516254425, |
| "learning_rate": 3.645848188570331e-05, |
| "loss": 0.0514, |
| "step": 9120 |
| }, |
| { |
| "epoch": 3.979947689625109, |
| "grad_norm": 0.7529284954071045, |
| "learning_rate": 3.635240322413374e-05, |
| "loss": 0.052, |
| "step": 9130 |
| }, |
| { |
| "epoch": 3.984306887532694, |
| "grad_norm": 0.6702316999435425, |
| "learning_rate": 3.624639089501187e-05, |
| "loss": 0.0387, |
| "step": 9140 |
| }, |
| { |
| "epoch": 3.988666085440279, |
| "grad_norm": 0.5268199443817139, |
| "learning_rate": 3.614044541359749e-05, |
| "loss": 0.0581, |
| "step": 9150 |
| }, |
| { |
| "epoch": 3.993025283347864, |
| "grad_norm": 1.170999526977539, |
| "learning_rate": 3.603456729482541e-05, |
| "loss": 0.0461, |
| "step": 9160 |
| }, |
| { |
| "epoch": 3.997384481255449, |
| "grad_norm": 0.3432202935218811, |
| "learning_rate": 3.5928757053303055e-05, |
| "loss": 0.0492, |
| "step": 9170 |
| }, |
| { |
| "epoch": 4.001743679163034, |
| "grad_norm": 0.6855699419975281, |
| "learning_rate": 3.5823015203308e-05, |
| "loss": 0.0454, |
| "step": 9180 |
| }, |
| { |
| "epoch": 4.006102877070619, |
| "grad_norm": 0.5655994415283203, |
| "learning_rate": 3.57173422587853e-05, |
| "loss": 0.0381, |
| "step": 9190 |
| }, |
| { |
| "epoch": 4.010462074978204, |
| "grad_norm": 0.46553653478622437, |
| "learning_rate": 3.561173873334522e-05, |
| "loss": 0.056, |
| "step": 9200 |
| }, |
| { |
| "epoch": 4.014821272885789, |
| "grad_norm": 0.43640783429145813, |
| "learning_rate": 3.550620514026056e-05, |
| "loss": 0.0501, |
| "step": 9210 |
| }, |
| { |
| "epoch": 4.019180470793374, |
| "grad_norm": 0.3680025041103363, |
| "learning_rate": 3.54007419924642e-05, |
| "loss": 0.0459, |
| "step": 9220 |
| }, |
| { |
| "epoch": 4.023539668700959, |
| "grad_norm": 1.250110149383545, |
| "learning_rate": 3.52953498025467e-05, |
| "loss": 0.0448, |
| "step": 9230 |
| }, |
| { |
| "epoch": 4.0278988666085445, |
| "grad_norm": 0.4605693221092224, |
| "learning_rate": 3.519002908275368e-05, |
| "loss": 0.0514, |
| "step": 9240 |
| }, |
| { |
| "epoch": 4.032258064516129, |
| "grad_norm": 0.7519360780715942, |
| "learning_rate": 3.508478034498339e-05, |
| "loss": 0.0384, |
| "step": 9250 |
| }, |
| { |
| "epoch": 4.036617262423714, |
| "grad_norm": 0.43192410469055176, |
| "learning_rate": 3.497960410078427e-05, |
| "loss": 0.044, |
| "step": 9260 |
| }, |
| { |
| "epoch": 4.040976460331299, |
| "grad_norm": 1.0785924196243286, |
| "learning_rate": 3.487450086135236e-05, |
| "loss": 0.0537, |
| "step": 9270 |
| }, |
| { |
| "epoch": 4.045335658238884, |
| "grad_norm": 0.6490569114685059, |
| "learning_rate": 3.476947113752891e-05, |
| "loss": 0.049, |
| "step": 9280 |
| }, |
| { |
| "epoch": 4.049694856146469, |
| "grad_norm": 0.8670549988746643, |
| "learning_rate": 3.4664515439797823e-05, |
| "loss": 0.0504, |
| "step": 9290 |
| }, |
| { |
| "epoch": 4.054054054054054, |
| "grad_norm": 0.39166024327278137, |
| "learning_rate": 3.45596342782832e-05, |
| "loss": 0.054, |
| "step": 9300 |
| }, |
| { |
| "epoch": 4.058413251961639, |
| "grad_norm": 0.5605502128601074, |
| "learning_rate": 3.4454828162746936e-05, |
| "loss": 0.047, |
| "step": 9310 |
| }, |
| { |
| "epoch": 4.062772449869224, |
| "grad_norm": 0.33376985788345337, |
| "learning_rate": 3.435009760258608e-05, |
| "loss": 0.064, |
| "step": 9320 |
| }, |
| { |
| "epoch": 4.067131647776809, |
| "grad_norm": 1.2488508224487305, |
| "learning_rate": 3.424544310683057e-05, |
| "loss": 0.0435, |
| "step": 9330 |
| }, |
| { |
| "epoch": 4.071490845684394, |
| "grad_norm": 0.9323531985282898, |
| "learning_rate": 3.41408651841405e-05, |
| "loss": 0.0489, |
| "step": 9340 |
| }, |
| { |
| "epoch": 4.075850043591979, |
| "grad_norm": 0.3177700340747833, |
| "learning_rate": 3.403636434280388e-05, |
| "loss": 0.0466, |
| "step": 9350 |
| }, |
| { |
| "epoch": 4.080209241499564, |
| "grad_norm": 0.5773137807846069, |
| "learning_rate": 3.393194109073411e-05, |
| "loss": 0.0456, |
| "step": 9360 |
| }, |
| { |
| "epoch": 4.084568439407149, |
| "grad_norm": 1.1703490018844604, |
| "learning_rate": 3.3827595935467376e-05, |
| "loss": 0.0647, |
| "step": 9370 |
| }, |
| { |
| "epoch": 4.088927637314734, |
| "grad_norm": 0.34152188897132874, |
| "learning_rate": 3.3723329384160344e-05, |
| "loss": 0.0459, |
| "step": 9380 |
| }, |
| { |
| "epoch": 4.093286835222319, |
| "grad_norm": 0.45289939641952515, |
| "learning_rate": 3.3619141943587646e-05, |
| "loss": 0.0457, |
| "step": 9390 |
| }, |
| { |
| "epoch": 4.097646033129904, |
| "grad_norm": 0.6022029519081116, |
| "learning_rate": 3.351503412013935e-05, |
| "loss": 0.0448, |
| "step": 9400 |
| }, |
| { |
| "epoch": 4.102005231037489, |
| "grad_norm": 0.4732722342014313, |
| "learning_rate": 3.341100641981863e-05, |
| "loss": 0.0612, |
| "step": 9410 |
| }, |
| { |
| "epoch": 4.106364428945074, |
| "grad_norm": 0.4304181933403015, |
| "learning_rate": 3.330705934823919e-05, |
| "loss": 0.0373, |
| "step": 9420 |
| }, |
| { |
| "epoch": 4.110723626852659, |
| "grad_norm": 0.37233036756515503, |
| "learning_rate": 3.3203193410622804e-05, |
| "loss": 0.0435, |
| "step": 9430 |
| }, |
| { |
| "epoch": 4.115082824760244, |
| "grad_norm": 0.3829936683177948, |
| "learning_rate": 3.309940911179701e-05, |
| "loss": 0.0474, |
| "step": 9440 |
| }, |
| { |
| "epoch": 4.119442022667829, |
| "grad_norm": 0.5989209413528442, |
| "learning_rate": 3.2995706956192465e-05, |
| "loss": 0.0397, |
| "step": 9450 |
| }, |
| { |
| "epoch": 4.123801220575414, |
| "grad_norm": 0.47757670283317566, |
| "learning_rate": 3.289208744784059e-05, |
| "loss": 0.0401, |
| "step": 9460 |
| }, |
| { |
| "epoch": 4.128160418482999, |
| "grad_norm": 0.8434343338012695, |
| "learning_rate": 3.2788551090371164e-05, |
| "loss": 0.0575, |
| "step": 9470 |
| }, |
| { |
| "epoch": 4.132519616390584, |
| "grad_norm": 0.693182110786438, |
| "learning_rate": 3.268509838700974e-05, |
| "loss": 0.0429, |
| "step": 9480 |
| }, |
| { |
| "epoch": 4.136878814298169, |
| "grad_norm": 0.5965169668197632, |
| "learning_rate": 3.258172984057535e-05, |
| "loss": 0.0432, |
| "step": 9490 |
| }, |
| { |
| "epoch": 4.141238012205754, |
| "grad_norm": 0.7130163311958313, |
| "learning_rate": 3.247844595347798e-05, |
| "loss": 0.0462, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.145597210113339, |
| "grad_norm": 0.4185994565486908, |
| "learning_rate": 3.2375247227716077e-05, |
| "loss": 0.0473, |
| "step": 9510 |
| }, |
| { |
| "epoch": 4.149956408020924, |
| "grad_norm": 0.3187268376350403, |
| "learning_rate": 3.2272134164874264e-05, |
| "loss": 0.0418, |
| "step": 9520 |
| }, |
| { |
| "epoch": 4.15431560592851, |
| "grad_norm": 0.3435496985912323, |
| "learning_rate": 3.216910726612073e-05, |
| "loss": 0.0472, |
| "step": 9530 |
| }, |
| { |
| "epoch": 4.158674803836094, |
| "grad_norm": 0.7399494051933289, |
| "learning_rate": 3.2066167032204956e-05, |
| "loss": 0.0512, |
| "step": 9540 |
| }, |
| { |
| "epoch": 4.1630340017436795, |
| "grad_norm": 0.3519555926322937, |
| "learning_rate": 3.196331396345512e-05, |
| "loss": 0.0412, |
| "step": 9550 |
| }, |
| { |
| "epoch": 4.167393199651264, |
| "grad_norm": 0.33296439051628113, |
| "learning_rate": 3.186054855977577e-05, |
| "loss": 0.0419, |
| "step": 9560 |
| }, |
| { |
| "epoch": 4.1717523975588495, |
| "grad_norm": 0.8820390701293945, |
| "learning_rate": 3.175787132064542e-05, |
| "loss": 0.0541, |
| "step": 9570 |
| }, |
| { |
| "epoch": 4.176111595466434, |
| "grad_norm": 0.4657217860221863, |
| "learning_rate": 3.165528274511397e-05, |
| "loss": 0.0496, |
| "step": 9580 |
| }, |
| { |
| "epoch": 4.1804707933740195, |
| "grad_norm": 0.8743098974227905, |
| "learning_rate": 3.155278333180047e-05, |
| "loss": 0.0475, |
| "step": 9590 |
| }, |
| { |
| "epoch": 4.184829991281604, |
| "grad_norm": 0.4864312410354614, |
| "learning_rate": 3.14503735788906e-05, |
| "loss": 0.0442, |
| "step": 9600 |
| }, |
| { |
| "epoch": 4.1891891891891895, |
| "grad_norm": 0.3236112892627716, |
| "learning_rate": 3.134805398413419e-05, |
| "loss": 0.0395, |
| "step": 9610 |
| }, |
| { |
| "epoch": 4.193548387096774, |
| "grad_norm": 0.4888274669647217, |
| "learning_rate": 3.1245825044842954e-05, |
| "loss": 0.0617, |
| "step": 9620 |
| }, |
| { |
| "epoch": 4.1979075850043595, |
| "grad_norm": 0.4416561722755432, |
| "learning_rate": 3.114368725788791e-05, |
| "loss": 0.0381, |
| "step": 9630 |
| }, |
| { |
| "epoch": 4.202266782911944, |
| "grad_norm": 0.4820091724395752, |
| "learning_rate": 3.1041641119697075e-05, |
| "loss": 0.0398, |
| "step": 9640 |
| }, |
| { |
| "epoch": 4.206625980819529, |
| "grad_norm": 0.6689565181732178, |
| "learning_rate": 3.093968712625306e-05, |
| "loss": 0.0466, |
| "step": 9650 |
| }, |
| { |
| "epoch": 4.210985178727114, |
| "grad_norm": 0.627013623714447, |
| "learning_rate": 3.0837825773090535e-05, |
| "loss": 0.044, |
| "step": 9660 |
| }, |
| { |
| "epoch": 4.215344376634699, |
| "grad_norm": 0.7483475804328918, |
| "learning_rate": 3.073605755529395e-05, |
| "loss": 0.0453, |
| "step": 9670 |
| }, |
| { |
| "epoch": 4.219703574542284, |
| "grad_norm": 0.7025408148765564, |
| "learning_rate": 3.063438296749511e-05, |
| "loss": 0.0443, |
| "step": 9680 |
| }, |
| { |
| "epoch": 4.224062772449869, |
| "grad_norm": 0.30035483837127686, |
| "learning_rate": 3.053280250387067e-05, |
| "loss": 0.0405, |
| "step": 9690 |
| }, |
| { |
| "epoch": 4.228421970357454, |
| "grad_norm": 0.7182040810585022, |
| "learning_rate": 3.043131665813988e-05, |
| "loss": 0.0364, |
| "step": 9700 |
| }, |
| { |
| "epoch": 4.232781168265039, |
| "grad_norm": 0.5624287724494934, |
| "learning_rate": 3.0329925923562073e-05, |
| "loss": 0.0459, |
| "step": 9710 |
| }, |
| { |
| "epoch": 4.237140366172624, |
| "grad_norm": 0.7536492347717285, |
| "learning_rate": 3.0228630792934277e-05, |
| "loss": 0.0462, |
| "step": 9720 |
| }, |
| { |
| "epoch": 4.241499564080209, |
| "grad_norm": 0.5312374234199524, |
| "learning_rate": 3.0127431758588918e-05, |
| "loss": 0.059, |
| "step": 9730 |
| }, |
| { |
| "epoch": 4.245858761987794, |
| "grad_norm": 0.2901996672153473, |
| "learning_rate": 3.002632931239133e-05, |
| "loss": 0.0353, |
| "step": 9740 |
| }, |
| { |
| "epoch": 4.250217959895379, |
| "grad_norm": 0.7586061954498291, |
| "learning_rate": 2.992532394573735e-05, |
| "loss": 0.0384, |
| "step": 9750 |
| }, |
| { |
| "epoch": 4.254577157802964, |
| "grad_norm": 0.9470781087875366, |
| "learning_rate": 2.982441614955105e-05, |
| "loss": 0.0458, |
| "step": 9760 |
| }, |
| { |
| "epoch": 4.258936355710549, |
| "grad_norm": 0.4688419997692108, |
| "learning_rate": 2.972360641428218e-05, |
| "loss": 0.0422, |
| "step": 9770 |
| }, |
| { |
| "epoch": 4.263295553618134, |
| "grad_norm": 1.2093769311904907, |
| "learning_rate": 2.9622895229903973e-05, |
| "loss": 0.043, |
| "step": 9780 |
| }, |
| { |
| "epoch": 4.267654751525719, |
| "grad_norm": 0.4565201699733734, |
| "learning_rate": 2.9522283085910612e-05, |
| "loss": 0.0442, |
| "step": 9790 |
| }, |
| { |
| "epoch": 4.272013949433305, |
| "grad_norm": 0.7303440570831299, |
| "learning_rate": 2.942177047131489e-05, |
| "loss": 0.0406, |
| "step": 9800 |
| }, |
| { |
| "epoch": 4.276373147340889, |
| "grad_norm": 0.8189641237258911, |
| "learning_rate": 2.9321357874645905e-05, |
| "loss": 0.06, |
| "step": 9810 |
| }, |
| { |
| "epoch": 4.280732345248475, |
| "grad_norm": 0.3686642348766327, |
| "learning_rate": 2.9221045783946577e-05, |
| "loss": 0.0512, |
| "step": 9820 |
| }, |
| { |
| "epoch": 4.285091543156059, |
| "grad_norm": 1.4388134479522705, |
| "learning_rate": 2.9120834686771394e-05, |
| "loss": 0.0465, |
| "step": 9830 |
| }, |
| { |
| "epoch": 4.289450741063645, |
| "grad_norm": 0.3605181574821472, |
| "learning_rate": 2.902072507018392e-05, |
| "loss": 0.0615, |
| "step": 9840 |
| }, |
| { |
| "epoch": 4.293809938971229, |
| "grad_norm": 0.8981125354766846, |
| "learning_rate": 2.892071742075446e-05, |
| "loss": 0.0469, |
| "step": 9850 |
| }, |
| { |
| "epoch": 4.298169136878815, |
| "grad_norm": 0.336178183555603, |
| "learning_rate": 2.8820812224557812e-05, |
| "loss": 0.0364, |
| "step": 9860 |
| }, |
| { |
| "epoch": 4.302528334786399, |
| "grad_norm": 0.26270031929016113, |
| "learning_rate": 2.8721009967170764e-05, |
| "loss": 0.0456, |
| "step": 9870 |
| }, |
| { |
| "epoch": 4.306887532693985, |
| "grad_norm": 0.5637022256851196, |
| "learning_rate": 2.8621311133669748e-05, |
| "loss": 0.057, |
| "step": 9880 |
| }, |
| { |
| "epoch": 4.311246730601569, |
| "grad_norm": 0.2882184088230133, |
| "learning_rate": 2.8521716208628595e-05, |
| "loss": 0.0381, |
| "step": 9890 |
| }, |
| { |
| "epoch": 4.315605928509155, |
| "grad_norm": 0.5276246070861816, |
| "learning_rate": 2.8422225676116015e-05, |
| "loss": 0.0402, |
| "step": 9900 |
| }, |
| { |
| "epoch": 4.319965126416739, |
| "grad_norm": 0.37984946370124817, |
| "learning_rate": 2.832284001969342e-05, |
| "loss": 0.0442, |
| "step": 9910 |
| }, |
| { |
| "epoch": 4.324324324324325, |
| "grad_norm": 0.600214421749115, |
| "learning_rate": 2.8223559722412408e-05, |
| "loss": 0.0531, |
| "step": 9920 |
| }, |
| { |
| "epoch": 4.328683522231909, |
| "grad_norm": 0.2663778066635132, |
| "learning_rate": 2.8124385266812516e-05, |
| "loss": 0.037, |
| "step": 9930 |
| }, |
| { |
| "epoch": 4.3330427201394945, |
| "grad_norm": 0.3363720178604126, |
| "learning_rate": 2.802531713491886e-05, |
| "loss": 0.0467, |
| "step": 9940 |
| }, |
| { |
| "epoch": 4.337401918047079, |
| "grad_norm": 1.2489317655563354, |
| "learning_rate": 2.7926355808239822e-05, |
| "loss": 0.0372, |
| "step": 9950 |
| }, |
| { |
| "epoch": 4.3417611159546645, |
| "grad_norm": 0.7166866660118103, |
| "learning_rate": 2.782750176776458e-05, |
| "loss": 0.0416, |
| "step": 9960 |
| }, |
| { |
| "epoch": 4.346120313862249, |
| "grad_norm": 0.468319296836853, |
| "learning_rate": 2.7728755493960946e-05, |
| "loss": 0.0416, |
| "step": 9970 |
| }, |
| { |
| "epoch": 4.3504795117698345, |
| "grad_norm": 0.6125366687774658, |
| "learning_rate": 2.7630117466772876e-05, |
| "loss": 0.0438, |
| "step": 9980 |
| }, |
| { |
| "epoch": 4.354838709677419, |
| "grad_norm": 0.600077748298645, |
| "learning_rate": 2.7531588165618278e-05, |
| "loss": 0.0431, |
| "step": 9990 |
| }, |
| { |
| "epoch": 4.3591979075850045, |
| "grad_norm": 0.8532693386077881, |
| "learning_rate": 2.7433168069386533e-05, |
| "loss": 0.0435, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.363557105492589, |
| "grad_norm": 0.5452614426612854, |
| "learning_rate": 2.7334857656436308e-05, |
| "loss": 0.0438, |
| "step": 10010 |
| }, |
| { |
| "epoch": 4.3679163034001744, |
| "grad_norm": 0.5705096125602722, |
| "learning_rate": 2.7236657404593157e-05, |
| "loss": 0.0465, |
| "step": 10020 |
| }, |
| { |
| "epoch": 4.372275501307759, |
| "grad_norm": 0.39495906233787537, |
| "learning_rate": 2.713856779114716e-05, |
| "loss": 0.0378, |
| "step": 10030 |
| }, |
| { |
| "epoch": 4.376634699215344, |
| "grad_norm": 0.5436052083969116, |
| "learning_rate": 2.704058929285074e-05, |
| "loss": 0.0444, |
| "step": 10040 |
| }, |
| { |
| "epoch": 4.380993897122929, |
| "grad_norm": 0.5021957755088806, |
| "learning_rate": 2.6942722385916175e-05, |
| "loss": 0.0427, |
| "step": 10050 |
| }, |
| { |
| "epoch": 4.385353095030514, |
| "grad_norm": 0.48665133118629456, |
| "learning_rate": 2.6844967546013394e-05, |
| "loss": 0.0448, |
| "step": 10060 |
| }, |
| { |
| "epoch": 4.3897122929381, |
| "grad_norm": 0.43248504400253296, |
| "learning_rate": 2.6747325248267673e-05, |
| "loss": 0.0313, |
| "step": 10070 |
| }, |
| { |
| "epoch": 4.394071490845684, |
| "grad_norm": 0.5743213295936584, |
| "learning_rate": 2.664979596725724e-05, |
| "loss": 0.0406, |
| "step": 10080 |
| }, |
| { |
| "epoch": 4.39843068875327, |
| "grad_norm": 0.6996235251426697, |
| "learning_rate": 2.655238017701105e-05, |
| "loss": 0.0445, |
| "step": 10090 |
| }, |
| { |
| "epoch": 4.402789886660854, |
| "grad_norm": 0.2858131229877472, |
| "learning_rate": 2.6455078351006455e-05, |
| "loss": 0.0446, |
| "step": 10100 |
| }, |
| { |
| "epoch": 4.40714908456844, |
| "grad_norm": 0.40710970759391785, |
| "learning_rate": 2.6357890962166866e-05, |
| "loss": 0.0337, |
| "step": 10110 |
| }, |
| { |
| "epoch": 4.411508282476024, |
| "grad_norm": 0.37843987345695496, |
| "learning_rate": 2.6260818482859534e-05, |
| "loss": 0.0404, |
| "step": 10120 |
| }, |
| { |
| "epoch": 4.41586748038361, |
| "grad_norm": 0.6559879183769226, |
| "learning_rate": 2.6163861384893156e-05, |
| "loss": 0.0457, |
| "step": 10130 |
| }, |
| { |
| "epoch": 4.420226678291194, |
| "grad_norm": 0.5004203915596008, |
| "learning_rate": 2.606702013951564e-05, |
| "loss": 0.0401, |
| "step": 10140 |
| }, |
| { |
| "epoch": 4.42458587619878, |
| "grad_norm": 0.6248252391815186, |
| "learning_rate": 2.5970295217411844e-05, |
| "loss": 0.0483, |
| "step": 10150 |
| }, |
| { |
| "epoch": 4.428945074106364, |
| "grad_norm": 0.5556444525718689, |
| "learning_rate": 2.5873687088701236e-05, |
| "loss": 0.0532, |
| "step": 10160 |
| }, |
| { |
| "epoch": 4.43330427201395, |
| "grad_norm": 0.2590063214302063, |
| "learning_rate": 2.5777196222935596e-05, |
| "loss": 0.0404, |
| "step": 10170 |
| }, |
| { |
| "epoch": 4.437663469921534, |
| "grad_norm": 1.113957166671753, |
| "learning_rate": 2.5680823089096807e-05, |
| "loss": 0.0517, |
| "step": 10180 |
| }, |
| { |
| "epoch": 4.44202266782912, |
| "grad_norm": 0.5693178772926331, |
| "learning_rate": 2.558456815559448e-05, |
| "loss": 0.0607, |
| "step": 10190 |
| }, |
| { |
| "epoch": 4.446381865736704, |
| "grad_norm": 0.5899903774261475, |
| "learning_rate": 2.548843189026378e-05, |
| "loss": 0.0371, |
| "step": 10200 |
| }, |
| { |
| "epoch": 4.45074106364429, |
| "grad_norm": 0.5805737972259521, |
| "learning_rate": 2.5392414760363048e-05, |
| "loss": 0.0452, |
| "step": 10210 |
| }, |
| { |
| "epoch": 4.455100261551874, |
| "grad_norm": 0.5417248606681824, |
| "learning_rate": 2.529651723257162e-05, |
| "loss": 0.0408, |
| "step": 10220 |
| }, |
| { |
| "epoch": 4.45945945945946, |
| "grad_norm": 0.5996112823486328, |
| "learning_rate": 2.5200739772987537e-05, |
| "loss": 0.0363, |
| "step": 10230 |
| }, |
| { |
| "epoch": 4.463818657367044, |
| "grad_norm": 1.2667959928512573, |
| "learning_rate": 2.5105082847125184e-05, |
| "loss": 0.0493, |
| "step": 10240 |
| }, |
| { |
| "epoch": 4.46817785527463, |
| "grad_norm": 0.7686100006103516, |
| "learning_rate": 2.5009546919913218e-05, |
| "loss": 0.0465, |
| "step": 10250 |
| }, |
| { |
| "epoch": 4.472537053182214, |
| "grad_norm": 0.509890079498291, |
| "learning_rate": 2.4914132455692098e-05, |
| "loss": 0.0418, |
| "step": 10260 |
| }, |
| { |
| "epoch": 4.4768962510898, |
| "grad_norm": 0.4904661476612091, |
| "learning_rate": 2.4818839918211962e-05, |
| "loss": 0.0433, |
| "step": 10270 |
| }, |
| { |
| "epoch": 4.481255448997384, |
| "grad_norm": 0.4387027621269226, |
| "learning_rate": 2.4723669770630376e-05, |
| "loss": 0.0459, |
| "step": 10280 |
| }, |
| { |
| "epoch": 4.48561464690497, |
| "grad_norm": 0.7030172944068909, |
| "learning_rate": 2.4628622475509972e-05, |
| "loss": 0.0476, |
| "step": 10290 |
| }, |
| { |
| "epoch": 4.489973844812554, |
| "grad_norm": 0.9251930117607117, |
| "learning_rate": 2.4533698494816342e-05, |
| "loss": 0.0427, |
| "step": 10300 |
| }, |
| { |
| "epoch": 4.49433304272014, |
| "grad_norm": 0.49212366342544556, |
| "learning_rate": 2.44388982899157e-05, |
| "loss": 0.0353, |
| "step": 10310 |
| }, |
| { |
| "epoch": 4.498692240627724, |
| "grad_norm": 0.4423346519470215, |
| "learning_rate": 2.4344222321572636e-05, |
| "loss": 0.0425, |
| "step": 10320 |
| }, |
| { |
| "epoch": 4.5030514385353095, |
| "grad_norm": 0.4786710739135742, |
| "learning_rate": 2.4249671049947954e-05, |
| "loss": 0.0409, |
| "step": 10330 |
| }, |
| { |
| "epoch": 4.507410636442895, |
| "grad_norm": 0.6801364421844482, |
| "learning_rate": 2.4155244934596333e-05, |
| "loss": 0.0483, |
| "step": 10340 |
| }, |
| { |
| "epoch": 4.5117698343504795, |
| "grad_norm": 0.39631038904190063, |
| "learning_rate": 2.406094443446416e-05, |
| "loss": 0.0497, |
| "step": 10350 |
| }, |
| { |
| "epoch": 4.516129032258064, |
| "grad_norm": 1.449474811553955, |
| "learning_rate": 2.3966770007887317e-05, |
| "loss": 0.0377, |
| "step": 10360 |
| }, |
| { |
| "epoch": 4.5204882301656495, |
| "grad_norm": 0.757426381111145, |
| "learning_rate": 2.3872722112588903e-05, |
| "loss": 0.0356, |
| "step": 10370 |
| }, |
| { |
| "epoch": 4.524847428073235, |
| "grad_norm": 0.35006773471832275, |
| "learning_rate": 2.3778801205676997e-05, |
| "loss": 0.0426, |
| "step": 10380 |
| }, |
| { |
| "epoch": 4.5292066259808195, |
| "grad_norm": 0.3051522374153137, |
| "learning_rate": 2.3685007743642524e-05, |
| "loss": 0.0389, |
| "step": 10390 |
| }, |
| { |
| "epoch": 4.533565823888405, |
| "grad_norm": 0.4886200726032257, |
| "learning_rate": 2.3591342182356914e-05, |
| "loss": 0.0479, |
| "step": 10400 |
| }, |
| { |
| "epoch": 4.5379250217959894, |
| "grad_norm": 1.0046757459640503, |
| "learning_rate": 2.3497804977070016e-05, |
| "loss": 0.0406, |
| "step": 10410 |
| }, |
| { |
| "epoch": 4.542284219703575, |
| "grad_norm": 0.5701280236244202, |
| "learning_rate": 2.3404396582407777e-05, |
| "loss": 0.0407, |
| "step": 10420 |
| }, |
| { |
| "epoch": 4.546643417611159, |
| "grad_norm": 0.48940399289131165, |
| "learning_rate": 2.331111745237007e-05, |
| "loss": 0.031, |
| "step": 10430 |
| }, |
| { |
| "epoch": 4.551002615518745, |
| "grad_norm": 0.43257051706314087, |
| "learning_rate": 2.3217968040328526e-05, |
| "loss": 0.0361, |
| "step": 10440 |
| }, |
| { |
| "epoch": 4.555361813426329, |
| "grad_norm": 0.5097408294677734, |
| "learning_rate": 2.3124948799024286e-05, |
| "loss": 0.0457, |
| "step": 10450 |
| }, |
| { |
| "epoch": 4.559721011333915, |
| "grad_norm": 0.7661223411560059, |
| "learning_rate": 2.3032060180565828e-05, |
| "loss": 0.0419, |
| "step": 10460 |
| }, |
| { |
| "epoch": 4.564080209241499, |
| "grad_norm": 0.6379186511039734, |
| "learning_rate": 2.2939302636426724e-05, |
| "loss": 0.0355, |
| "step": 10470 |
| }, |
| { |
| "epoch": 4.568439407149085, |
| "grad_norm": 0.4069131910800934, |
| "learning_rate": 2.2846676617443458e-05, |
| "loss": 0.0355, |
| "step": 10480 |
| }, |
| { |
| "epoch": 4.572798605056669, |
| "grad_norm": 0.6078165173530579, |
| "learning_rate": 2.275418257381332e-05, |
| "loss": 0.0332, |
| "step": 10490 |
| }, |
| { |
| "epoch": 4.577157802964255, |
| "grad_norm": 0.3824521601200104, |
| "learning_rate": 2.2661820955092083e-05, |
| "loss": 0.0327, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.581517000871839, |
| "grad_norm": 0.5751760601997375, |
| "learning_rate": 2.256959221019193e-05, |
| "loss": 0.0447, |
| "step": 10510 |
| }, |
| { |
| "epoch": 4.585876198779425, |
| "grad_norm": 0.6389051079750061, |
| "learning_rate": 2.2477496787379227e-05, |
| "loss": 0.0282, |
| "step": 10520 |
| }, |
| { |
| "epoch": 4.590235396687009, |
| "grad_norm": 0.3855462372303009, |
| "learning_rate": 2.238553513427229e-05, |
| "loss": 0.043, |
| "step": 10530 |
| }, |
| { |
| "epoch": 4.594594594594595, |
| "grad_norm": 0.4871591031551361, |
| "learning_rate": 2.2293707697839344e-05, |
| "loss": 0.0347, |
| "step": 10540 |
| }, |
| { |
| "epoch": 4.598953792502179, |
| "grad_norm": 0.4838610291481018, |
| "learning_rate": 2.2202014924396214e-05, |
| "loss": 0.0282, |
| "step": 10550 |
| }, |
| { |
| "epoch": 4.603312990409765, |
| "grad_norm": 0.477239727973938, |
| "learning_rate": 2.21104572596042e-05, |
| "loss": 0.0409, |
| "step": 10560 |
| }, |
| { |
| "epoch": 4.607672188317349, |
| "grad_norm": 0.5358338356018066, |
| "learning_rate": 2.2019035148468e-05, |
| "loss": 0.0315, |
| "step": 10570 |
| }, |
| { |
| "epoch": 4.612031386224935, |
| "grad_norm": 2.4292683601379395, |
| "learning_rate": 2.1927749035333374e-05, |
| "loss": 0.0449, |
| "step": 10580 |
| }, |
| { |
| "epoch": 4.616390584132519, |
| "grad_norm": 0.7061522603034973, |
| "learning_rate": 2.1836599363885152e-05, |
| "loss": 0.0497, |
| "step": 10590 |
| }, |
| { |
| "epoch": 4.620749782040105, |
| "grad_norm": 0.3709982633590698, |
| "learning_rate": 2.1745586577144993e-05, |
| "loss": 0.0511, |
| "step": 10600 |
| }, |
| { |
| "epoch": 4.62510897994769, |
| "grad_norm": 1.73948073387146, |
| "learning_rate": 2.1654711117469207e-05, |
| "loss": 0.0544, |
| "step": 10610 |
| }, |
| { |
| "epoch": 4.629468177855275, |
| "grad_norm": 0.4604334235191345, |
| "learning_rate": 2.1563973426546702e-05, |
| "loss": 0.0536, |
| "step": 10620 |
| }, |
| { |
| "epoch": 4.633827375762859, |
| "grad_norm": 0.3564910888671875, |
| "learning_rate": 2.1473373945396728e-05, |
| "loss": 0.0338, |
| "step": 10630 |
| }, |
| { |
| "epoch": 4.638186573670445, |
| "grad_norm": 0.3538748621940613, |
| "learning_rate": 2.138291311436679e-05, |
| "loss": 0.0441, |
| "step": 10640 |
| }, |
| { |
| "epoch": 4.64254577157803, |
| "grad_norm": 0.520347535610199, |
| "learning_rate": 2.1292591373130518e-05, |
| "loss": 0.0448, |
| "step": 10650 |
| }, |
| { |
| "epoch": 4.646904969485615, |
| "grad_norm": 0.4080093502998352, |
| "learning_rate": 2.1202409160685528e-05, |
| "loss": 0.0491, |
| "step": 10660 |
| }, |
| { |
| "epoch": 4.6512641673932, |
| "grad_norm": 0.9783157706260681, |
| "learning_rate": 2.1112366915351228e-05, |
| "loss": 0.0511, |
| "step": 10670 |
| }, |
| { |
| "epoch": 4.655623365300785, |
| "grad_norm": 0.809532880783081, |
| "learning_rate": 2.102246507476679e-05, |
| "loss": 0.0323, |
| "step": 10680 |
| }, |
| { |
| "epoch": 4.65998256320837, |
| "grad_norm": 0.3239237666130066, |
| "learning_rate": 2.09327040758889e-05, |
| "loss": 0.0359, |
| "step": 10690 |
| }, |
| { |
| "epoch": 4.6643417611159546, |
| "grad_norm": 0.6846075057983398, |
| "learning_rate": 2.0843084354989767e-05, |
| "loss": 0.0662, |
| "step": 10700 |
| }, |
| { |
| "epoch": 4.66870095902354, |
| "grad_norm": 0.6443043351173401, |
| "learning_rate": 2.0753606347654892e-05, |
| "loss": 0.0402, |
| "step": 10710 |
| }, |
| { |
| "epoch": 4.6730601569311245, |
| "grad_norm": 0.6246269941329956, |
| "learning_rate": 2.0664270488780985e-05, |
| "loss": 0.0366, |
| "step": 10720 |
| }, |
| { |
| "epoch": 4.67741935483871, |
| "grad_norm": 0.4732634127140045, |
| "learning_rate": 2.0575077212573905e-05, |
| "loss": 0.0357, |
| "step": 10730 |
| }, |
| { |
| "epoch": 4.6817785527462945, |
| "grad_norm": 0.579984188079834, |
| "learning_rate": 2.0486026952546484e-05, |
| "loss": 0.031, |
| "step": 10740 |
| }, |
| { |
| "epoch": 4.68613775065388, |
| "grad_norm": 0.9379808902740479, |
| "learning_rate": 2.0397120141516457e-05, |
| "loss": 0.0402, |
| "step": 10750 |
| }, |
| { |
| "epoch": 4.6904969485614645, |
| "grad_norm": 0.3776817321777344, |
| "learning_rate": 2.0308357211604313e-05, |
| "loss": 0.0386, |
| "step": 10760 |
| }, |
| { |
| "epoch": 4.69485614646905, |
| "grad_norm": 1.3237546682357788, |
| "learning_rate": 2.0219738594231224e-05, |
| "loss": 0.0427, |
| "step": 10770 |
| }, |
| { |
| "epoch": 4.6992153443766345, |
| "grad_norm": 0.5787206888198853, |
| "learning_rate": 2.0131264720116993e-05, |
| "loss": 0.0481, |
| "step": 10780 |
| }, |
| { |
| "epoch": 4.70357454228422, |
| "grad_norm": 0.7264513373374939, |
| "learning_rate": 2.0042936019277853e-05, |
| "loss": 0.0367, |
| "step": 10790 |
| }, |
| { |
| "epoch": 4.707933740191804, |
| "grad_norm": 0.655706524848938, |
| "learning_rate": 1.99547529210245e-05, |
| "loss": 0.0495, |
| "step": 10800 |
| }, |
| { |
| "epoch": 4.71229293809939, |
| "grad_norm": 0.4518389403820038, |
| "learning_rate": 1.9866715853959934e-05, |
| "loss": 0.0332, |
| "step": 10810 |
| }, |
| { |
| "epoch": 4.716652136006974, |
| "grad_norm": 0.4472216069698334, |
| "learning_rate": 1.977882524597734e-05, |
| "loss": 0.0413, |
| "step": 10820 |
| }, |
| { |
| "epoch": 4.72101133391456, |
| "grad_norm": 0.4006964862346649, |
| "learning_rate": 1.969108152425813e-05, |
| "loss": 0.0359, |
| "step": 10830 |
| }, |
| { |
| "epoch": 4.725370531822144, |
| "grad_norm": 0.30864083766937256, |
| "learning_rate": 1.9603485115269744e-05, |
| "loss": 0.048, |
| "step": 10840 |
| }, |
| { |
| "epoch": 4.72972972972973, |
| "grad_norm": 0.9182401895523071, |
| "learning_rate": 1.9516036444763613e-05, |
| "loss": 0.0425, |
| "step": 10850 |
| }, |
| { |
| "epoch": 4.734088927637314, |
| "grad_norm": 0.5361258387565613, |
| "learning_rate": 1.9428735937773173e-05, |
| "loss": 0.0297, |
| "step": 10860 |
| }, |
| { |
| "epoch": 4.7384481255449, |
| "grad_norm": 0.39075401425361633, |
| "learning_rate": 1.9341584018611646e-05, |
| "loss": 0.0348, |
| "step": 10870 |
| }, |
| { |
| "epoch": 4.742807323452485, |
| "grad_norm": 0.28990718722343445, |
| "learning_rate": 1.9254581110870123e-05, |
| "loss": 0.046, |
| "step": 10880 |
| }, |
| { |
| "epoch": 4.74716652136007, |
| "grad_norm": 0.3305884003639221, |
| "learning_rate": 1.916772763741544e-05, |
| "loss": 0.04, |
| "step": 10890 |
| }, |
| { |
| "epoch": 4.751525719267654, |
| "grad_norm": 0.5273476839065552, |
| "learning_rate": 1.908102402038807e-05, |
| "loss": 0.0477, |
| "step": 10900 |
| }, |
| { |
| "epoch": 4.75588491717524, |
| "grad_norm": 0.6094452738761902, |
| "learning_rate": 1.8994470681200204e-05, |
| "loss": 0.0311, |
| "step": 10910 |
| }, |
| { |
| "epoch": 4.760244115082825, |
| "grad_norm": 0.4191102385520935, |
| "learning_rate": 1.8908068040533578e-05, |
| "loss": 0.0368, |
| "step": 10920 |
| }, |
| { |
| "epoch": 4.76460331299041, |
| "grad_norm": 1.5498143434524536, |
| "learning_rate": 1.8821816518337455e-05, |
| "loss": 0.0454, |
| "step": 10930 |
| }, |
| { |
| "epoch": 4.768962510897994, |
| "grad_norm": 0.4996424615383148, |
| "learning_rate": 1.8735716533826663e-05, |
| "loss": 0.0414, |
| "step": 10940 |
| }, |
| { |
| "epoch": 4.77332170880558, |
| "grad_norm": 0.5708060264587402, |
| "learning_rate": 1.8649768505479476e-05, |
| "loss": 0.0416, |
| "step": 10950 |
| }, |
| { |
| "epoch": 4.777680906713165, |
| "grad_norm": 0.5143120884895325, |
| "learning_rate": 1.8563972851035616e-05, |
| "loss": 0.0317, |
| "step": 10960 |
| }, |
| { |
| "epoch": 4.78204010462075, |
| "grad_norm": 0.345091849565506, |
| "learning_rate": 1.847832998749418e-05, |
| "loss": 0.0326, |
| "step": 10970 |
| }, |
| { |
| "epoch": 4.786399302528335, |
| "grad_norm": 0.2509484887123108, |
| "learning_rate": 1.8392840331111644e-05, |
| "loss": 0.0326, |
| "step": 10980 |
| }, |
| { |
| "epoch": 4.79075850043592, |
| "grad_norm": 0.5557523369789124, |
| "learning_rate": 1.830750429739989e-05, |
| "loss": 0.0354, |
| "step": 10990 |
| }, |
| { |
| "epoch": 4.795117698343505, |
| "grad_norm": 0.4494255483150482, |
| "learning_rate": 1.822232230112409e-05, |
| "loss": 0.0312, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.79947689625109, |
| "grad_norm": 0.706129789352417, |
| "learning_rate": 1.813729475630071e-05, |
| "loss": 0.0421, |
| "step": 11010 |
| }, |
| { |
| "epoch": 4.803836094158675, |
| "grad_norm": 0.2876376211643219, |
| "learning_rate": 1.8052422076195635e-05, |
| "loss": 0.0346, |
| "step": 11020 |
| }, |
| { |
| "epoch": 4.80819529206626, |
| "grad_norm": 0.5017814040184021, |
| "learning_rate": 1.7967704673321918e-05, |
| "loss": 0.0362, |
| "step": 11030 |
| }, |
| { |
| "epoch": 4.812554489973845, |
| "grad_norm": 0.5770791172981262, |
| "learning_rate": 1.7883142959438004e-05, |
| "loss": 0.0349, |
| "step": 11040 |
| }, |
| { |
| "epoch": 4.81691368788143, |
| "grad_norm": 0.3633856773376465, |
| "learning_rate": 1.779873734554558e-05, |
| "loss": 0.0327, |
| "step": 11050 |
| }, |
| { |
| "epoch": 4.821272885789015, |
| "grad_norm": 0.5029147863388062, |
| "learning_rate": 1.771448824188761e-05, |
| "loss": 0.0362, |
| "step": 11060 |
| }, |
| { |
| "epoch": 4.8256320836966, |
| "grad_norm": 0.668860912322998, |
| "learning_rate": 1.763039605794644e-05, |
| "loss": 0.0379, |
| "step": 11070 |
| }, |
| { |
| "epoch": 4.829991281604185, |
| "grad_norm": 0.34870511293411255, |
| "learning_rate": 1.754646120244164e-05, |
| "loss": 0.0462, |
| "step": 11080 |
| }, |
| { |
| "epoch": 4.8343504795117695, |
| "grad_norm": 0.29230761528015137, |
| "learning_rate": 1.7462684083328144e-05, |
| "loss": 0.0334, |
| "step": 11090 |
| }, |
| { |
| "epoch": 4.838709677419355, |
| "grad_norm": 0.3179951012134552, |
| "learning_rate": 1.7379065107794262e-05, |
| "loss": 0.0352, |
| "step": 11100 |
| }, |
| { |
| "epoch": 4.8430688753269395, |
| "grad_norm": 0.4508034884929657, |
| "learning_rate": 1.7295604682259586e-05, |
| "loss": 0.0387, |
| "step": 11110 |
| }, |
| { |
| "epoch": 4.847428073234525, |
| "grad_norm": 0.49519672989845276, |
| "learning_rate": 1.7212303212373175e-05, |
| "loss": 0.0341, |
| "step": 11120 |
| }, |
| { |
| "epoch": 4.8517872711421095, |
| "grad_norm": 0.448363721370697, |
| "learning_rate": 1.712916110301146e-05, |
| "loss": 0.0397, |
| "step": 11130 |
| }, |
| { |
| "epoch": 4.856146469049695, |
| "grad_norm": 0.2779761552810669, |
| "learning_rate": 1.7046178758276298e-05, |
| "loss": 0.0302, |
| "step": 11140 |
| }, |
| { |
| "epoch": 4.8605056669572795, |
| "grad_norm": 1.0302684307098389, |
| "learning_rate": 1.696335658149309e-05, |
| "loss": 0.0253, |
| "step": 11150 |
| }, |
| { |
| "epoch": 4.864864864864865, |
| "grad_norm": 0.31043338775634766, |
| "learning_rate": 1.6880694975208727e-05, |
| "loss": 0.0339, |
| "step": 11160 |
| }, |
| { |
| "epoch": 4.8692240627724495, |
| "grad_norm": 0.8034403324127197, |
| "learning_rate": 1.6798194341189687e-05, |
| "loss": 0.0328, |
| "step": 11170 |
| }, |
| { |
| "epoch": 4.873583260680035, |
| "grad_norm": 0.7642386555671692, |
| "learning_rate": 1.671585508042003e-05, |
| "loss": 0.0342, |
| "step": 11180 |
| }, |
| { |
| "epoch": 4.87794245858762, |
| "grad_norm": 0.23145624995231628, |
| "learning_rate": 1.6633677593099483e-05, |
| "loss": 0.0373, |
| "step": 11190 |
| }, |
| { |
| "epoch": 4.882301656495205, |
| "grad_norm": 0.38365113735198975, |
| "learning_rate": 1.655166227864154e-05, |
| "loss": 0.0372, |
| "step": 11200 |
| }, |
| { |
| "epoch": 4.886660854402789, |
| "grad_norm": 0.6353790163993835, |
| "learning_rate": 1.6469809535671426e-05, |
| "loss": 0.0388, |
| "step": 11210 |
| }, |
| { |
| "epoch": 4.891020052310375, |
| "grad_norm": 0.4335973560810089, |
| "learning_rate": 1.638811976202421e-05, |
| "loss": 0.0359, |
| "step": 11220 |
| }, |
| { |
| "epoch": 4.89537925021796, |
| "grad_norm": 0.9411593675613403, |
| "learning_rate": 1.6306593354742895e-05, |
| "loss": 0.0461, |
| "step": 11230 |
| }, |
| { |
| "epoch": 4.899738448125545, |
| "grad_norm": 0.45522540807724, |
| "learning_rate": 1.6225230710076455e-05, |
| "loss": 0.0335, |
| "step": 11240 |
| }, |
| { |
| "epoch": 4.90409764603313, |
| "grad_norm": 0.5510297417640686, |
| "learning_rate": 1.6144032223477924e-05, |
| "loss": 0.0277, |
| "step": 11250 |
| }, |
| { |
| "epoch": 4.908456843940715, |
| "grad_norm": 0.730690062046051, |
| "learning_rate": 1.606299828960243e-05, |
| "loss": 0.034, |
| "step": 11260 |
| }, |
| { |
| "epoch": 4.9128160418483, |
| "grad_norm": 0.47577011585235596, |
| "learning_rate": 1.5982129302305337e-05, |
| "loss": 0.032, |
| "step": 11270 |
| }, |
| { |
| "epoch": 4.917175239755885, |
| "grad_norm": 0.4926137328147888, |
| "learning_rate": 1.590142565464032e-05, |
| "loss": 0.0396, |
| "step": 11280 |
| }, |
| { |
| "epoch": 4.92153443766347, |
| "grad_norm": 0.3040984570980072, |
| "learning_rate": 1.5820887738857408e-05, |
| "loss": 0.0396, |
| "step": 11290 |
| }, |
| { |
| "epoch": 4.925893635571055, |
| "grad_norm": 0.4636688530445099, |
| "learning_rate": 1.5740515946401134e-05, |
| "loss": 0.0325, |
| "step": 11300 |
| }, |
| { |
| "epoch": 4.93025283347864, |
| "grad_norm": 0.5039488673210144, |
| "learning_rate": 1.5660310667908634e-05, |
| "loss": 0.0361, |
| "step": 11310 |
| }, |
| { |
| "epoch": 4.934612031386225, |
| "grad_norm": 0.5043140053749084, |
| "learning_rate": 1.5580272293207655e-05, |
| "loss": 0.0395, |
| "step": 11320 |
| }, |
| { |
| "epoch": 4.93897122929381, |
| "grad_norm": 0.4170094430446625, |
| "learning_rate": 1.5500401211314796e-05, |
| "loss": 0.0323, |
| "step": 11330 |
| }, |
| { |
| "epoch": 4.943330427201395, |
| "grad_norm": 0.3870650827884674, |
| "learning_rate": 1.542069781043351e-05, |
| "loss": 0.0256, |
| "step": 11340 |
| }, |
| { |
| "epoch": 4.94768962510898, |
| "grad_norm": 0.567021369934082, |
| "learning_rate": 1.534116247795226e-05, |
| "loss": 0.0493, |
| "step": 11350 |
| }, |
| { |
| "epoch": 4.952048823016565, |
| "grad_norm": 0.7827485203742981, |
| "learning_rate": 1.526179560044267e-05, |
| "loss": 0.0308, |
| "step": 11360 |
| }, |
| { |
| "epoch": 4.95640802092415, |
| "grad_norm": 1.4778623580932617, |
| "learning_rate": 1.5182597563657552e-05, |
| "loss": 0.0246, |
| "step": 11370 |
| }, |
| { |
| "epoch": 4.960767218831735, |
| "grad_norm": 0.6621624827384949, |
| "learning_rate": 1.5103568752529135e-05, |
| "loss": 0.0359, |
| "step": 11380 |
| }, |
| { |
| "epoch": 4.96512641673932, |
| "grad_norm": 0.8446053862571716, |
| "learning_rate": 1.5024709551167142e-05, |
| "loss": 0.0351, |
| "step": 11390 |
| }, |
| { |
| "epoch": 4.969485614646905, |
| "grad_norm": 0.9632458090782166, |
| "learning_rate": 1.4946020342856898e-05, |
| "loss": 0.0451, |
| "step": 11400 |
| }, |
| { |
| "epoch": 4.97384481255449, |
| "grad_norm": 0.44982174038887024, |
| "learning_rate": 1.4867501510057546e-05, |
| "loss": 0.0281, |
| "step": 11410 |
| }, |
| { |
| "epoch": 4.978204010462075, |
| "grad_norm": 0.2952253818511963, |
| "learning_rate": 1.4789153434400094e-05, |
| "loss": 0.0435, |
| "step": 11420 |
| }, |
| { |
| "epoch": 4.98256320836966, |
| "grad_norm": 1.1763001680374146, |
| "learning_rate": 1.4710976496685614e-05, |
| "loss": 0.0324, |
| "step": 11430 |
| }, |
| { |
| "epoch": 4.986922406277245, |
| "grad_norm": 0.6411994099617004, |
| "learning_rate": 1.4632971076883406e-05, |
| "loss": 0.0316, |
| "step": 11440 |
| }, |
| { |
| "epoch": 4.99128160418483, |
| "grad_norm": 0.6712291240692139, |
| "learning_rate": 1.4555137554129117e-05, |
| "loss": 0.0378, |
| "step": 11450 |
| }, |
| { |
| "epoch": 4.9956408020924155, |
| "grad_norm": 0.3442004919052124, |
| "learning_rate": 1.4477476306722925e-05, |
| "loss": 0.043, |
| "step": 11460 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.1500236988067627, |
| "learning_rate": 1.439998771212766e-05, |
| "loss": 0.0319, |
| "step": 11470 |
| }, |
| { |
| "epoch": 5.004359197907585, |
| "grad_norm": 0.3608720600605011, |
| "learning_rate": 1.4322672146966982e-05, |
| "loss": 0.0297, |
| "step": 11480 |
| }, |
| { |
| "epoch": 5.00871839581517, |
| "grad_norm": 0.41150960326194763, |
| "learning_rate": 1.4245529987023621e-05, |
| "loss": 0.043, |
| "step": 11490 |
| }, |
| { |
| "epoch": 5.013077593722755, |
| "grad_norm": 1.1648448705673218, |
| "learning_rate": 1.4168561607237436e-05, |
| "loss": 0.0323, |
| "step": 11500 |
| }, |
| { |
| "epoch": 5.01743679163034, |
| "grad_norm": 0.17882536351680756, |
| "learning_rate": 1.4091767381703657e-05, |
| "loss": 0.0245, |
| "step": 11510 |
| }, |
| { |
| "epoch": 5.021795989537925, |
| "grad_norm": 0.556475818157196, |
| "learning_rate": 1.4015147683671087e-05, |
| "loss": 0.0321, |
| "step": 11520 |
| }, |
| { |
| "epoch": 5.02615518744551, |
| "grad_norm": 0.26074153184890747, |
| "learning_rate": 1.3938702885540239e-05, |
| "loss": 0.0257, |
| "step": 11530 |
| }, |
| { |
| "epoch": 5.030514385353095, |
| "grad_norm": 0.5722861886024475, |
| "learning_rate": 1.3862433358861576e-05, |
| "loss": 0.038, |
| "step": 11540 |
| }, |
| { |
| "epoch": 5.03487358326068, |
| "grad_norm": 0.5401778221130371, |
| "learning_rate": 1.3786339474333636e-05, |
| "loss": 0.0378, |
| "step": 11550 |
| }, |
| { |
| "epoch": 5.039232781168265, |
| "grad_norm": 0.5791760683059692, |
| "learning_rate": 1.3710421601801265e-05, |
| "loss": 0.0338, |
| "step": 11560 |
| }, |
| { |
| "epoch": 5.04359197907585, |
| "grad_norm": 0.31818994879722595, |
| "learning_rate": 1.3634680110253883e-05, |
| "loss": 0.0296, |
| "step": 11570 |
| }, |
| { |
| "epoch": 5.047951176983435, |
| "grad_norm": 0.6561838984489441, |
| "learning_rate": 1.3559115367823556e-05, |
| "loss": 0.0365, |
| "step": 11580 |
| }, |
| { |
| "epoch": 5.05231037489102, |
| "grad_norm": 0.8950375318527222, |
| "learning_rate": 1.3483727741783342e-05, |
| "loss": 0.0383, |
| "step": 11590 |
| }, |
| { |
| "epoch": 5.056669572798605, |
| "grad_norm": 0.3717140257358551, |
| "learning_rate": 1.3408517598545444e-05, |
| "loss": 0.0292, |
| "step": 11600 |
| }, |
| { |
| "epoch": 5.06102877070619, |
| "grad_norm": 0.31723707914352417, |
| "learning_rate": 1.3333485303659381e-05, |
| "loss": 0.0413, |
| "step": 11610 |
| }, |
| { |
| "epoch": 5.065387968613775, |
| "grad_norm": 0.5031527876853943, |
| "learning_rate": 1.3258631221810331e-05, |
| "loss": 0.0457, |
| "step": 11620 |
| }, |
| { |
| "epoch": 5.06974716652136, |
| "grad_norm": 0.41329917311668396, |
| "learning_rate": 1.3183955716817232e-05, |
| "loss": 0.0521, |
| "step": 11630 |
| }, |
| { |
| "epoch": 5.074106364428945, |
| "grad_norm": 1.1271079778671265, |
| "learning_rate": 1.3109459151631076e-05, |
| "loss": 0.0304, |
| "step": 11640 |
| }, |
| { |
| "epoch": 5.07846556233653, |
| "grad_norm": 0.7091724276542664, |
| "learning_rate": 1.3035141888333202e-05, |
| "loss": 0.0406, |
| "step": 11650 |
| }, |
| { |
| "epoch": 5.082824760244115, |
| "grad_norm": 0.43470293283462524, |
| "learning_rate": 1.2961004288133388e-05, |
| "loss": 0.0356, |
| "step": 11660 |
| }, |
| { |
| "epoch": 5.0871839581517, |
| "grad_norm": 0.7189029455184937, |
| "learning_rate": 1.2887046711368245e-05, |
| "loss": 0.0336, |
| "step": 11670 |
| }, |
| { |
| "epoch": 5.091543156059285, |
| "grad_norm": 0.4963429868221283, |
| "learning_rate": 1.2813269517499399e-05, |
| "loss": 0.0405, |
| "step": 11680 |
| }, |
| { |
| "epoch": 5.09590235396687, |
| "grad_norm": 0.7285826206207275, |
| "learning_rate": 1.273967306511169e-05, |
| "loss": 0.0334, |
| "step": 11690 |
| }, |
| { |
| "epoch": 5.100261551874455, |
| "grad_norm": 0.5211066007614136, |
| "learning_rate": 1.2666257711911566e-05, |
| "loss": 0.0348, |
| "step": 11700 |
| }, |
| { |
| "epoch": 5.10462074978204, |
| "grad_norm": 0.2556770145893097, |
| "learning_rate": 1.2593023814725214e-05, |
| "loss": 0.0293, |
| "step": 11710 |
| }, |
| { |
| "epoch": 5.108979947689625, |
| "grad_norm": 1.1604270935058594, |
| "learning_rate": 1.251997172949686e-05, |
| "loss": 0.0379, |
| "step": 11720 |
| }, |
| { |
| "epoch": 5.11333914559721, |
| "grad_norm": 0.6733997464179993, |
| "learning_rate": 1.2447101811287109e-05, |
| "loss": 0.0282, |
| "step": 11730 |
| }, |
| { |
| "epoch": 5.117698343504795, |
| "grad_norm": 0.3374277949333191, |
| "learning_rate": 1.237441441427114e-05, |
| "loss": 0.0355, |
| "step": 11740 |
| }, |
| { |
| "epoch": 5.12205754141238, |
| "grad_norm": 0.565790593624115, |
| "learning_rate": 1.2301909891737018e-05, |
| "loss": 0.0401, |
| "step": 11750 |
| }, |
| { |
| "epoch": 5.126416739319965, |
| "grad_norm": 0.21954980492591858, |
| "learning_rate": 1.2229588596083957e-05, |
| "loss": 0.0337, |
| "step": 11760 |
| }, |
| { |
| "epoch": 5.1307759372275505, |
| "grad_norm": 0.48581477999687195, |
| "learning_rate": 1.2157450878820608e-05, |
| "loss": 0.0364, |
| "step": 11770 |
| }, |
| { |
| "epoch": 5.135135135135135, |
| "grad_norm": 0.9423235654830933, |
| "learning_rate": 1.2085497090563407e-05, |
| "loss": 0.0289, |
| "step": 11780 |
| }, |
| { |
| "epoch": 5.1394943330427205, |
| "grad_norm": 1.247639536857605, |
| "learning_rate": 1.2013727581034783e-05, |
| "loss": 0.0304, |
| "step": 11790 |
| }, |
| { |
| "epoch": 5.143853530950305, |
| "grad_norm": 0.683613121509552, |
| "learning_rate": 1.1942142699061498e-05, |
| "loss": 0.0416, |
| "step": 11800 |
| }, |
| { |
| "epoch": 5.1482127288578905, |
| "grad_norm": 0.4748249053955078, |
| "learning_rate": 1.1870742792572992e-05, |
| "loss": 0.0417, |
| "step": 11810 |
| }, |
| { |
| "epoch": 5.152571926765475, |
| "grad_norm": 0.8540623784065247, |
| "learning_rate": 1.1799528208599637e-05, |
| "loss": 0.0354, |
| "step": 11820 |
| }, |
| { |
| "epoch": 5.1569311246730605, |
| "grad_norm": 0.8364782929420471, |
| "learning_rate": 1.1728499293271079e-05, |
| "loss": 0.0485, |
| "step": 11830 |
| }, |
| { |
| "epoch": 5.161290322580645, |
| "grad_norm": 0.3155808448791504, |
| "learning_rate": 1.1657656391814509e-05, |
| "loss": 0.0285, |
| "step": 11840 |
| }, |
| { |
| "epoch": 5.1656495204882305, |
| "grad_norm": 0.7690255045890808, |
| "learning_rate": 1.1586999848553043e-05, |
| "loss": 0.0305, |
| "step": 11850 |
| }, |
| { |
| "epoch": 5.170008718395815, |
| "grad_norm": 0.48023709654808044, |
| "learning_rate": 1.1516530006904053e-05, |
| "loss": 0.0385, |
| "step": 11860 |
| }, |
| { |
| "epoch": 5.1743679163034, |
| "grad_norm": 0.5498557686805725, |
| "learning_rate": 1.1446247209377403e-05, |
| "loss": 0.0317, |
| "step": 11870 |
| }, |
| { |
| "epoch": 5.178727114210985, |
| "grad_norm": 0.3345494568347931, |
| "learning_rate": 1.1376151797573925e-05, |
| "loss": 0.0376, |
| "step": 11880 |
| }, |
| { |
| "epoch": 5.18308631211857, |
| "grad_norm": 0.9440948963165283, |
| "learning_rate": 1.1306244112183662e-05, |
| "loss": 0.0325, |
| "step": 11890 |
| }, |
| { |
| "epoch": 5.187445510026155, |
| "grad_norm": 0.6693992018699646, |
| "learning_rate": 1.1236524492984203e-05, |
| "loss": 0.0351, |
| "step": 11900 |
| }, |
| { |
| "epoch": 5.19180470793374, |
| "grad_norm": 0.614152193069458, |
| "learning_rate": 1.116699327883911e-05, |
| "loss": 0.0351, |
| "step": 11910 |
| }, |
| { |
| "epoch": 5.196163905841325, |
| "grad_norm": 0.8623160719871521, |
| "learning_rate": 1.1097650807696209e-05, |
| "loss": 0.0332, |
| "step": 11920 |
| }, |
| { |
| "epoch": 5.20052310374891, |
| "grad_norm": 0.40504971146583557, |
| "learning_rate": 1.1028497416585931e-05, |
| "loss": 0.0338, |
| "step": 11930 |
| }, |
| { |
| "epoch": 5.204882301656495, |
| "grad_norm": 0.39141932129859924, |
| "learning_rate": 1.0959533441619762e-05, |
| "loss": 0.0309, |
| "step": 11940 |
| }, |
| { |
| "epoch": 5.20924149956408, |
| "grad_norm": 1.2070059776306152, |
| "learning_rate": 1.0890759217988527e-05, |
| "loss": 0.0261, |
| "step": 11950 |
| }, |
| { |
| "epoch": 5.213600697471665, |
| "grad_norm": 0.6789277195930481, |
| "learning_rate": 1.0822175079960806e-05, |
| "loss": 0.0258, |
| "step": 11960 |
| }, |
| { |
| "epoch": 5.21795989537925, |
| "grad_norm": 0.546126127243042, |
| "learning_rate": 1.0753781360881265e-05, |
| "loss": 0.0337, |
| "step": 11970 |
| }, |
| { |
| "epoch": 5.222319093286835, |
| "grad_norm": 0.6033177375793457, |
| "learning_rate": 1.0685578393169055e-05, |
| "loss": 0.0384, |
| "step": 11980 |
| }, |
| { |
| "epoch": 5.22667829119442, |
| "grad_norm": 0.48450666666030884, |
| "learning_rate": 1.061756650831625e-05, |
| "loss": 0.0343, |
| "step": 11990 |
| }, |
| { |
| "epoch": 5.231037489102005, |
| "grad_norm": 0.42560434341430664, |
| "learning_rate": 1.054974603688616e-05, |
| "loss": 0.0387, |
| "step": 12000 |
| }, |
| { |
| "epoch": 5.23539668700959, |
| "grad_norm": 0.333484947681427, |
| "learning_rate": 1.048211730851173e-05, |
| "loss": 0.0352, |
| "step": 12010 |
| }, |
| { |
| "epoch": 5.239755884917175, |
| "grad_norm": 0.6437160968780518, |
| "learning_rate": 1.0414680651894004e-05, |
| "loss": 0.0338, |
| "step": 12020 |
| }, |
| { |
| "epoch": 5.24411508282476, |
| "grad_norm": 0.3939863443374634, |
| "learning_rate": 1.034743639480047e-05, |
| "loss": 0.0289, |
| "step": 12030 |
| }, |
| { |
| "epoch": 5.248474280732346, |
| "grad_norm": 0.48622220754623413, |
| "learning_rate": 1.0280384864063497e-05, |
| "loss": 0.0316, |
| "step": 12040 |
| }, |
| { |
| "epoch": 5.25283347863993, |
| "grad_norm": 1.1890780925750732, |
| "learning_rate": 1.0213526385578704e-05, |
| "loss": 0.0396, |
| "step": 12050 |
| }, |
| { |
| "epoch": 5.257192676547516, |
| "grad_norm": 0.6726014614105225, |
| "learning_rate": 1.0146861284303394e-05, |
| "loss": 0.0326, |
| "step": 12060 |
| }, |
| { |
| "epoch": 5.2615518744551, |
| "grad_norm": 0.46193766593933105, |
| "learning_rate": 1.0080389884255037e-05, |
| "loss": 0.0467, |
| "step": 12070 |
| }, |
| { |
| "epoch": 5.265911072362686, |
| "grad_norm": 0.9077117443084717, |
| "learning_rate": 1.0014112508509588e-05, |
| "loss": 0.041, |
| "step": 12080 |
| }, |
| { |
| "epoch": 5.27027027027027, |
| "grad_norm": 0.4619888365268707, |
| "learning_rate": 9.948029479199994e-06, |
| "loss": 0.0318, |
| "step": 12090 |
| }, |
| { |
| "epoch": 5.274629468177856, |
| "grad_norm": 0.4092944860458374, |
| "learning_rate": 9.882141117514632e-06, |
| "loss": 0.0429, |
| "step": 12100 |
| }, |
| { |
| "epoch": 5.27898866608544, |
| "grad_norm": 0.3199276030063629, |
| "learning_rate": 9.816447743695656e-06, |
| "loss": 0.0315, |
| "step": 12110 |
| }, |
| { |
| "epoch": 5.283347863993026, |
| "grad_norm": 0.3841368556022644, |
| "learning_rate": 9.75094967703758e-06, |
| "loss": 0.0245, |
| "step": 12120 |
| }, |
| { |
| "epoch": 5.28770706190061, |
| "grad_norm": 0.7042669057846069, |
| "learning_rate": 9.685647235885597e-06, |
| "loss": 0.0424, |
| "step": 12130 |
| }, |
| { |
| "epoch": 5.292066259808196, |
| "grad_norm": 0.379041850566864, |
| "learning_rate": 9.620540737634087e-06, |
| "loss": 0.0303, |
| "step": 12140 |
| }, |
| { |
| "epoch": 5.29642545771578, |
| "grad_norm": 0.23851239681243896, |
| "learning_rate": 9.555630498725133e-06, |
| "loss": 0.026, |
| "step": 12150 |
| }, |
| { |
| "epoch": 5.3007846556233655, |
| "grad_norm": 0.503830075263977, |
| "learning_rate": 9.49091683464684e-06, |
| "loss": 0.0251, |
| "step": 12160 |
| }, |
| { |
| "epoch": 5.30514385353095, |
| "grad_norm": 0.8782485127449036, |
| "learning_rate": 9.426400059931955e-06, |
| "loss": 0.0361, |
| "step": 12170 |
| }, |
| { |
| "epoch": 5.3095030514385355, |
| "grad_norm": 1.1160036325454712, |
| "learning_rate": 9.362080488156245e-06, |
| "loss": 0.0438, |
| "step": 12180 |
| }, |
| { |
| "epoch": 5.31386224934612, |
| "grad_norm": 0.3587688207626343, |
| "learning_rate": 9.29795843193697e-06, |
| "loss": 0.0276, |
| "step": 12190 |
| }, |
| { |
| "epoch": 5.3182214472537055, |
| "grad_norm": 0.7131944298744202, |
| "learning_rate": 9.234034202931447e-06, |
| "loss": 0.0407, |
| "step": 12200 |
| }, |
| { |
| "epoch": 5.32258064516129, |
| "grad_norm": 0.38495564460754395, |
| "learning_rate": 9.170308111835418e-06, |
| "loss": 0.0416, |
| "step": 12210 |
| }, |
| { |
| "epoch": 5.3269398430688755, |
| "grad_norm": 0.32060274481773376, |
| "learning_rate": 9.106780468381631e-06, |
| "loss": 0.0316, |
| "step": 12220 |
| }, |
| { |
| "epoch": 5.33129904097646, |
| "grad_norm": 1.0149906873703003, |
| "learning_rate": 9.043451581338302e-06, |
| "loss": 0.0384, |
| "step": 12230 |
| }, |
| { |
| "epoch": 5.3356582388840454, |
| "grad_norm": 0.6786044836044312, |
| "learning_rate": 8.980321758507615e-06, |
| "loss": 0.0346, |
| "step": 12240 |
| }, |
| { |
| "epoch": 5.34001743679163, |
| "grad_norm": 0.4378306567668915, |
| "learning_rate": 8.91739130672425e-06, |
| "loss": 0.0442, |
| "step": 12250 |
| }, |
| { |
| "epoch": 5.344376634699215, |
| "grad_norm": 0.402309775352478, |
| "learning_rate": 8.85466053185382e-06, |
| "loss": 0.0371, |
| "step": 12260 |
| }, |
| { |
| "epoch": 5.3487358326068, |
| "grad_norm": 0.21001383662223816, |
| "learning_rate": 8.792129738791455e-06, |
| "loss": 0.0299, |
| "step": 12270 |
| }, |
| { |
| "epoch": 5.353095030514385, |
| "grad_norm": 0.3518972396850586, |
| "learning_rate": 8.729799231460318e-06, |
| "loss": 0.0319, |
| "step": 12280 |
| }, |
| { |
| "epoch": 5.35745422842197, |
| "grad_norm": 0.4094241261482239, |
| "learning_rate": 8.66766931281009e-06, |
| "loss": 0.0329, |
| "step": 12290 |
| }, |
| { |
| "epoch": 5.361813426329555, |
| "grad_norm": 0.3522825837135315, |
| "learning_rate": 8.6057402848155e-06, |
| "loss": 0.0247, |
| "step": 12300 |
| }, |
| { |
| "epoch": 5.366172624237141, |
| "grad_norm": 0.4518503248691559, |
| "learning_rate": 8.544012448474904e-06, |
| "loss": 0.0345, |
| "step": 12310 |
| }, |
| { |
| "epoch": 5.370531822144725, |
| "grad_norm": 0.5210450291633606, |
| "learning_rate": 8.482486103808779e-06, |
| "loss": 0.0418, |
| "step": 12320 |
| }, |
| { |
| "epoch": 5.374891020052311, |
| "grad_norm": 0.30797097086906433, |
| "learning_rate": 8.42116154985828e-06, |
| "loss": 0.0305, |
| "step": 12330 |
| }, |
| { |
| "epoch": 5.379250217959895, |
| "grad_norm": 0.4493660628795624, |
| "learning_rate": 8.360039084683779e-06, |
| "loss": 0.0319, |
| "step": 12340 |
| }, |
| { |
| "epoch": 5.383609415867481, |
| "grad_norm": 0.4356549382209778, |
| "learning_rate": 8.299119005363404e-06, |
| "loss": 0.03, |
| "step": 12350 |
| }, |
| { |
| "epoch": 5.387968613775065, |
| "grad_norm": 1.2415368556976318, |
| "learning_rate": 8.238401607991647e-06, |
| "loss": 0.042, |
| "step": 12360 |
| }, |
| { |
| "epoch": 5.392327811682651, |
| "grad_norm": 0.3500710129737854, |
| "learning_rate": 8.177887187677847e-06, |
| "loss": 0.0279, |
| "step": 12370 |
| }, |
| { |
| "epoch": 5.396687009590235, |
| "grad_norm": 0.3411845862865448, |
| "learning_rate": 8.117576038544838e-06, |
| "loss": 0.0328, |
| "step": 12380 |
| }, |
| { |
| "epoch": 5.401046207497821, |
| "grad_norm": 0.4249480366706848, |
| "learning_rate": 8.057468453727479e-06, |
| "loss": 0.0434, |
| "step": 12390 |
| }, |
| { |
| "epoch": 5.405405405405405, |
| "grad_norm": 0.3020738959312439, |
| "learning_rate": 7.997564725371182e-06, |
| "loss": 0.037, |
| "step": 12400 |
| }, |
| { |
| "epoch": 5.409764603312991, |
| "grad_norm": 0.4569971561431885, |
| "learning_rate": 7.937865144630601e-06, |
| "loss": 0.0428, |
| "step": 12410 |
| }, |
| { |
| "epoch": 5.414123801220575, |
| "grad_norm": 0.22348935902118683, |
| "learning_rate": 7.878370001668116e-06, |
| "loss": 0.0271, |
| "step": 12420 |
| }, |
| { |
| "epoch": 5.418482999128161, |
| "grad_norm": 0.5622937679290771, |
| "learning_rate": 7.819079585652461e-06, |
| "loss": 0.0438, |
| "step": 12430 |
| }, |
| { |
| "epoch": 5.422842197035745, |
| "grad_norm": 0.46097010374069214, |
| "learning_rate": 7.759994184757358e-06, |
| "loss": 0.04, |
| "step": 12440 |
| }, |
| { |
| "epoch": 5.427201394943331, |
| "grad_norm": 0.6537830829620361, |
| "learning_rate": 7.701114086160027e-06, |
| "loss": 0.0323, |
| "step": 12450 |
| }, |
| { |
| "epoch": 5.431560592850915, |
| "grad_norm": 3.8158767223358154, |
| "learning_rate": 7.642439576039884e-06, |
| "loss": 0.0369, |
| "step": 12460 |
| }, |
| { |
| "epoch": 5.435919790758501, |
| "grad_norm": 0.2568453550338745, |
| "learning_rate": 7.583970939577101e-06, |
| "loss": 0.0288, |
| "step": 12470 |
| }, |
| { |
| "epoch": 5.440278988666085, |
| "grad_norm": 0.2281847447156906, |
| "learning_rate": 7.525708460951197e-06, |
| "loss": 0.0245, |
| "step": 12480 |
| }, |
| { |
| "epoch": 5.444638186573671, |
| "grad_norm": 0.7292711138725281, |
| "learning_rate": 7.467652423339733e-06, |
| "loss": 0.0312, |
| "step": 12490 |
| }, |
| { |
| "epoch": 5.448997384481255, |
| "grad_norm": 0.2882106602191925, |
| "learning_rate": 7.409803108916841e-06, |
| "loss": 0.0375, |
| "step": 12500 |
| }, |
| { |
| "epoch": 5.453356582388841, |
| "grad_norm": 0.5214361548423767, |
| "learning_rate": 7.35216079885192e-06, |
| "loss": 0.0422, |
| "step": 12510 |
| }, |
| { |
| "epoch": 5.457715780296425, |
| "grad_norm": 0.42829445004463196, |
| "learning_rate": 7.29472577330827e-06, |
| "loss": 0.0288, |
| "step": 12520 |
| }, |
| { |
| "epoch": 5.4620749782040106, |
| "grad_norm": 0.5174663662910461, |
| "learning_rate": 7.237498311441676e-06, |
| "loss": 0.026, |
| "step": 12530 |
| }, |
| { |
| "epoch": 5.466434176111595, |
| "grad_norm": 0.4603443741798401, |
| "learning_rate": 7.180478691399134e-06, |
| "loss": 0.0334, |
| "step": 12540 |
| }, |
| { |
| "epoch": 5.4707933740191805, |
| "grad_norm": 0.8484715223312378, |
| "learning_rate": 7.123667190317396e-06, |
| "loss": 0.0392, |
| "step": 12550 |
| }, |
| { |
| "epoch": 5.475152571926765, |
| "grad_norm": 0.5899950265884399, |
| "learning_rate": 7.06706408432169e-06, |
| "loss": 0.0327, |
| "step": 12560 |
| }, |
| { |
| "epoch": 5.4795117698343505, |
| "grad_norm": 0.9594012498855591, |
| "learning_rate": 7.010669648524404e-06, |
| "loss": 0.0308, |
| "step": 12570 |
| }, |
| { |
| "epoch": 5.483870967741936, |
| "grad_norm": 0.795964777469635, |
| "learning_rate": 6.954484157023661e-06, |
| "loss": 0.0279, |
| "step": 12580 |
| }, |
| { |
| "epoch": 5.4882301656495205, |
| "grad_norm": 0.46093374490737915, |
| "learning_rate": 6.898507882902078e-06, |
| "loss": 0.039, |
| "step": 12590 |
| }, |
| { |
| "epoch": 5.492589363557105, |
| "grad_norm": 0.5869463086128235, |
| "learning_rate": 6.842741098225358e-06, |
| "loss": 0.0359, |
| "step": 12600 |
| }, |
| { |
| "epoch": 5.4969485614646905, |
| "grad_norm": 0.4688364565372467, |
| "learning_rate": 6.787184074041031e-06, |
| "loss": 0.0231, |
| "step": 12610 |
| }, |
| { |
| "epoch": 5.501307759372276, |
| "grad_norm": 0.3394297957420349, |
| "learning_rate": 6.731837080377129e-06, |
| "loss": 0.0267, |
| "step": 12620 |
| }, |
| { |
| "epoch": 5.50566695727986, |
| "grad_norm": 0.46087536215782166, |
| "learning_rate": 6.676700386240814e-06, |
| "loss": 0.0236, |
| "step": 12630 |
| }, |
| { |
| "epoch": 5.510026155187446, |
| "grad_norm": 0.48075783252716064, |
| "learning_rate": 6.621774259617125e-06, |
| "loss": 0.0451, |
| "step": 12640 |
| }, |
| { |
| "epoch": 5.51438535309503, |
| "grad_norm": 0.23513250052928925, |
| "learning_rate": 6.567058967467704e-06, |
| "loss": 0.028, |
| "step": 12650 |
| }, |
| { |
| "epoch": 5.518744551002616, |
| "grad_norm": 0.5110394358634949, |
| "learning_rate": 6.51255477572939e-06, |
| "loss": 0.0384, |
| "step": 12660 |
| }, |
| { |
| "epoch": 5.5231037489102, |
| "grad_norm": 0.47604143619537354, |
| "learning_rate": 6.45826194931306e-06, |
| "loss": 0.0326, |
| "step": 12670 |
| }, |
| { |
| "epoch": 5.527462946817786, |
| "grad_norm": 1.0370049476623535, |
| "learning_rate": 6.4041807521022454e-06, |
| "loss": 0.0338, |
| "step": 12680 |
| }, |
| { |
| "epoch": 5.53182214472537, |
| "grad_norm": 0.7761918902397156, |
| "learning_rate": 6.350311446951868e-06, |
| "loss": 0.0323, |
| "step": 12690 |
| }, |
| { |
| "epoch": 5.536181342632956, |
| "grad_norm": 0.5699298977851868, |
| "learning_rate": 6.29665429568701e-06, |
| "loss": 0.0307, |
| "step": 12700 |
| }, |
| { |
| "epoch": 5.54054054054054, |
| "grad_norm": 0.47337207198143005, |
| "learning_rate": 6.2432095591015705e-06, |
| "loss": 0.0235, |
| "step": 12710 |
| }, |
| { |
| "epoch": 5.544899738448126, |
| "grad_norm": 0.694935142993927, |
| "learning_rate": 6.1899774969570444e-06, |
| "loss": 0.035, |
| "step": 12720 |
| }, |
| { |
| "epoch": 5.54925893635571, |
| "grad_norm": 0.2715081572532654, |
| "learning_rate": 6.136958367981272e-06, |
| "loss": 0.0288, |
| "step": 12730 |
| }, |
| { |
| "epoch": 5.553618134263296, |
| "grad_norm": 0.588737964630127, |
| "learning_rate": 6.084152429867113e-06, |
| "loss": 0.0341, |
| "step": 12740 |
| }, |
| { |
| "epoch": 5.55797733217088, |
| "grad_norm": 0.3322398066520691, |
| "learning_rate": 6.0315599392712865e-06, |
| "loss": 0.0248, |
| "step": 12750 |
| }, |
| { |
| "epoch": 5.562336530078466, |
| "grad_norm": 0.6606812477111816, |
| "learning_rate": 5.979181151813057e-06, |
| "loss": 0.0438, |
| "step": 12760 |
| }, |
| { |
| "epoch": 5.56669572798605, |
| "grad_norm": 0.5031450390815735, |
| "learning_rate": 5.927016322072992e-06, |
| "loss": 0.0434, |
| "step": 12770 |
| }, |
| { |
| "epoch": 5.571054925893636, |
| "grad_norm": 0.44513940811157227, |
| "learning_rate": 5.875065703591787e-06, |
| "loss": 0.0377, |
| "step": 12780 |
| }, |
| { |
| "epoch": 5.57541412380122, |
| "grad_norm": 0.3935040533542633, |
| "learning_rate": 5.823329548868939e-06, |
| "loss": 0.0295, |
| "step": 12790 |
| }, |
| { |
| "epoch": 5.579773321708806, |
| "grad_norm": 0.6372385621070862, |
| "learning_rate": 5.77180810936162e-06, |
| "loss": 0.0295, |
| "step": 12800 |
| }, |
| { |
| "epoch": 5.58413251961639, |
| "grad_norm": 0.35186004638671875, |
| "learning_rate": 5.720501635483366e-06, |
| "loss": 0.0349, |
| "step": 12810 |
| }, |
| { |
| "epoch": 5.588491717523976, |
| "grad_norm": 0.8061618208885193, |
| "learning_rate": 5.669410376602918e-06, |
| "loss": 0.0345, |
| "step": 12820 |
| }, |
| { |
| "epoch": 5.59285091543156, |
| "grad_norm": 0.6604674458503723, |
| "learning_rate": 5.618534581043011e-06, |
| "loss": 0.0353, |
| "step": 12830 |
| }, |
| { |
| "epoch": 5.597210113339146, |
| "grad_norm": 0.7145407199859619, |
| "learning_rate": 5.5678744960791005e-06, |
| "loss": 0.0277, |
| "step": 12840 |
| }, |
| { |
| "epoch": 5.601569311246731, |
| "grad_norm": 0.3610726296901703, |
| "learning_rate": 5.517430367938237e-06, |
| "loss": 0.0308, |
| "step": 12850 |
| }, |
| { |
| "epoch": 5.605928509154316, |
| "grad_norm": 0.48084914684295654, |
| "learning_rate": 5.467202441797842e-06, |
| "loss": 0.0322, |
| "step": 12860 |
| }, |
| { |
| "epoch": 5.6102877070619, |
| "grad_norm": 0.6275615096092224, |
| "learning_rate": 5.417190961784497e-06, |
| "loss": 0.0315, |
| "step": 12870 |
| }, |
| { |
| "epoch": 5.614646904969486, |
| "grad_norm": 0.44279929995536804, |
| "learning_rate": 5.3673961709727885e-06, |
| "loss": 0.0272, |
| "step": 12880 |
| }, |
| { |
| "epoch": 5.619006102877071, |
| "grad_norm": 0.5052009224891663, |
| "learning_rate": 5.317818311384115e-06, |
| "loss": 0.032, |
| "step": 12890 |
| }, |
| { |
| "epoch": 5.623365300784656, |
| "grad_norm": 0.48991960287094116, |
| "learning_rate": 5.2684576239854895e-06, |
| "loss": 0.0356, |
| "step": 12900 |
| }, |
| { |
| "epoch": 5.627724498692241, |
| "grad_norm": 0.6088931560516357, |
| "learning_rate": 5.219314348688414e-06, |
| "loss": 0.0298, |
| "step": 12910 |
| }, |
| { |
| "epoch": 5.6320836965998256, |
| "grad_norm": 0.35137468576431274, |
| "learning_rate": 5.170388724347658e-06, |
| "loss": 0.0266, |
| "step": 12920 |
| }, |
| { |
| "epoch": 5.636442894507411, |
| "grad_norm": 0.8328220248222351, |
| "learning_rate": 5.1216809887601245e-06, |
| "loss": 0.0292, |
| "step": 12930 |
| }, |
| { |
| "epoch": 5.6408020924149955, |
| "grad_norm": 0.33078819513320923, |
| "learning_rate": 5.073191378663733e-06, |
| "loss": 0.0291, |
| "step": 12940 |
| }, |
| { |
| "epoch": 5.645161290322581, |
| "grad_norm": 0.2188701182603836, |
| "learning_rate": 5.024920129736188e-06, |
| "loss": 0.0233, |
| "step": 12950 |
| }, |
| { |
| "epoch": 5.6495204882301655, |
| "grad_norm": 0.602378785610199, |
| "learning_rate": 4.976867476593894e-06, |
| "loss": 0.0244, |
| "step": 12960 |
| }, |
| { |
| "epoch": 5.653879686137751, |
| "grad_norm": 0.6373008489608765, |
| "learning_rate": 4.929033652790821e-06, |
| "loss": 0.0245, |
| "step": 12970 |
| }, |
| { |
| "epoch": 5.6582388840453355, |
| "grad_norm": 0.5099015831947327, |
| "learning_rate": 4.881418890817296e-06, |
| "loss": 0.0261, |
| "step": 12980 |
| }, |
| { |
| "epoch": 5.662598081952921, |
| "grad_norm": 1.1864821910858154, |
| "learning_rate": 4.834023422098971e-06, |
| "loss": 0.0251, |
| "step": 12990 |
| }, |
| { |
| "epoch": 5.6669572798605055, |
| "grad_norm": 0.4129869043827057, |
| "learning_rate": 4.7868474769956266e-06, |
| "loss": 0.0294, |
| "step": 13000 |
| }, |
| { |
| "epoch": 5.671316477768091, |
| "grad_norm": 0.45849207043647766, |
| "learning_rate": 4.7398912848000636e-06, |
| "loss": 0.0399, |
| "step": 13010 |
| }, |
| { |
| "epoch": 5.675675675675675, |
| "grad_norm": 0.9468610882759094, |
| "learning_rate": 4.6931550737370264e-06, |
| "loss": 0.0347, |
| "step": 13020 |
| }, |
| { |
| "epoch": 5.680034873583261, |
| "grad_norm": 1.0548272132873535, |
| "learning_rate": 4.646639070962067e-06, |
| "loss": 0.0387, |
| "step": 13030 |
| }, |
| { |
| "epoch": 5.684394071490845, |
| "grad_norm": 0.408389151096344, |
| "learning_rate": 4.600343502560439e-06, |
| "loss": 0.0272, |
| "step": 13040 |
| }, |
| { |
| "epoch": 5.688753269398431, |
| "grad_norm": 0.36637166142463684, |
| "learning_rate": 4.55426859354599e-06, |
| "loss": 0.021, |
| "step": 13050 |
| }, |
| { |
| "epoch": 5.693112467306015, |
| "grad_norm": 0.21423012018203735, |
| "learning_rate": 4.5084145678600805e-06, |
| "loss": 0.027, |
| "step": 13060 |
| }, |
| { |
| "epoch": 5.697471665213601, |
| "grad_norm": 0.29542121291160583, |
| "learning_rate": 4.462781648370518e-06, |
| "loss": 0.0399, |
| "step": 13070 |
| }, |
| { |
| "epoch": 5.701830863121185, |
| "grad_norm": 0.4850723147392273, |
| "learning_rate": 4.417370056870418e-06, |
| "loss": 0.0304, |
| "step": 13080 |
| }, |
| { |
| "epoch": 5.706190061028771, |
| "grad_norm": 0.9915159344673157, |
| "learning_rate": 4.372180014077193e-06, |
| "loss": 0.0303, |
| "step": 13090 |
| }, |
| { |
| "epoch": 5.710549258936355, |
| "grad_norm": 0.1932496577501297, |
| "learning_rate": 4.327211739631415e-06, |
| "loss": 0.027, |
| "step": 13100 |
| }, |
| { |
| "epoch": 5.714908456843941, |
| "grad_norm": 0.26505881547927856, |
| "learning_rate": 4.282465452095802e-06, |
| "loss": 0.0208, |
| "step": 13110 |
| }, |
| { |
| "epoch": 5.719267654751526, |
| "grad_norm": 0.22322647273540497, |
| "learning_rate": 4.237941368954124e-06, |
| "loss": 0.0239, |
| "step": 13120 |
| }, |
| { |
| "epoch": 5.723626852659111, |
| "grad_norm": 0.40861910581588745, |
| "learning_rate": 4.193639706610147e-06, |
| "loss": 0.0339, |
| "step": 13130 |
| }, |
| { |
| "epoch": 5.727986050566695, |
| "grad_norm": 0.7970739603042603, |
| "learning_rate": 4.149560680386588e-06, |
| "loss": 0.0282, |
| "step": 13140 |
| }, |
| { |
| "epoch": 5.732345248474281, |
| "grad_norm": 0.8208338022232056, |
| "learning_rate": 4.105704504524094e-06, |
| "loss": 0.0314, |
| "step": 13150 |
| }, |
| { |
| "epoch": 5.736704446381866, |
| "grad_norm": 0.4076552987098694, |
| "learning_rate": 4.0620713921801334e-06, |
| "loss": 0.02, |
| "step": 13160 |
| }, |
| { |
| "epoch": 5.741063644289451, |
| "grad_norm": 0.3223102390766144, |
| "learning_rate": 4.0186615554280385e-06, |
| "loss": 0.0232, |
| "step": 13170 |
| }, |
| { |
| "epoch": 5.745422842197035, |
| "grad_norm": 0.4325304925441742, |
| "learning_rate": 3.975475205255929e-06, |
| "loss": 0.0306, |
| "step": 13180 |
| }, |
| { |
| "epoch": 5.749782040104621, |
| "grad_norm": 0.40523847937583923, |
| "learning_rate": 3.932512551565676e-06, |
| "loss": 0.0249, |
| "step": 13190 |
| }, |
| { |
| "epoch": 5.754141238012206, |
| "grad_norm": 0.4874592423439026, |
| "learning_rate": 3.889773803171936e-06, |
| "loss": 0.0345, |
| "step": 13200 |
| }, |
| { |
| "epoch": 5.758500435919791, |
| "grad_norm": 0.5539955496788025, |
| "learning_rate": 3.847259167801076e-06, |
| "loss": 0.0184, |
| "step": 13210 |
| }, |
| { |
| "epoch": 5.762859633827376, |
| "grad_norm": 0.22394351661205292, |
| "learning_rate": 3.804968852090185e-06, |
| "loss": 0.0255, |
| "step": 13220 |
| }, |
| { |
| "epoch": 5.767218831734961, |
| "grad_norm": 0.28927505016326904, |
| "learning_rate": 3.762903061586104e-06, |
| "loss": 0.0325, |
| "step": 13230 |
| }, |
| { |
| "epoch": 5.771578029642546, |
| "grad_norm": 0.401094913482666, |
| "learning_rate": 3.721062000744363e-06, |
| "loss": 0.0371, |
| "step": 13240 |
| }, |
| { |
| "epoch": 5.775937227550131, |
| "grad_norm": 0.35075923800468445, |
| "learning_rate": 3.679445872928244e-06, |
| "loss": 0.0284, |
| "step": 13250 |
| }, |
| { |
| "epoch": 5.780296425457716, |
| "grad_norm": 0.6507555246353149, |
| "learning_rate": 3.6380548804077707e-06, |
| "loss": 0.0272, |
| "step": 13260 |
| }, |
| { |
| "epoch": 5.784655623365301, |
| "grad_norm": 0.270612895488739, |
| "learning_rate": 3.5968892243587016e-06, |
| "loss": 0.0268, |
| "step": 13270 |
| }, |
| { |
| "epoch": 5.789014821272886, |
| "grad_norm": 0.5847600102424622, |
| "learning_rate": 3.555949104861611e-06, |
| "loss": 0.0259, |
| "step": 13280 |
| }, |
| { |
| "epoch": 5.793374019180471, |
| "grad_norm": 1.932289958000183, |
| "learning_rate": 3.5152347209008394e-06, |
| "loss": 0.0279, |
| "step": 13290 |
| }, |
| { |
| "epoch": 5.797733217088056, |
| "grad_norm": 0.4612036943435669, |
| "learning_rate": 3.4747462703636104e-06, |
| "loss": 0.0282, |
| "step": 13300 |
| }, |
| { |
| "epoch": 5.8020924149956405, |
| "grad_norm": 0.5467131733894348, |
| "learning_rate": 3.434483950038986e-06, |
| "loss": 0.0336, |
| "step": 13310 |
| }, |
| { |
| "epoch": 5.806451612903226, |
| "grad_norm": 0.9813832640647888, |
| "learning_rate": 3.3944479556169806e-06, |
| "loss": 0.0346, |
| "step": 13320 |
| }, |
| { |
| "epoch": 5.8108108108108105, |
| "grad_norm": 0.4749749004840851, |
| "learning_rate": 3.3546384816875665e-06, |
| "loss": 0.0308, |
| "step": 13330 |
| }, |
| { |
| "epoch": 5.815170008718396, |
| "grad_norm": 0.4085749089717865, |
| "learning_rate": 3.315055721739746e-06, |
| "loss": 0.0279, |
| "step": 13340 |
| }, |
| { |
| "epoch": 5.8195292066259805, |
| "grad_norm": 0.5082237124443054, |
| "learning_rate": 3.275699868160592e-06, |
| "loss": 0.0269, |
| "step": 13350 |
| }, |
| { |
| "epoch": 5.823888404533566, |
| "grad_norm": 0.3107988238334656, |
| "learning_rate": 3.23657111223436e-06, |
| "loss": 0.0298, |
| "step": 13360 |
| }, |
| { |
| "epoch": 5.8282476024411505, |
| "grad_norm": 0.7513600587844849, |
| "learning_rate": 3.1976696441414764e-06, |
| "loss": 0.0407, |
| "step": 13370 |
| }, |
| { |
| "epoch": 5.832606800348736, |
| "grad_norm": 0.5760849118232727, |
| "learning_rate": 3.158995652957719e-06, |
| "loss": 0.0304, |
| "step": 13380 |
| }, |
| { |
| "epoch": 5.8369659982563205, |
| "grad_norm": 0.3615356981754303, |
| "learning_rate": 3.1205493266531937e-06, |
| "loss": 0.0263, |
| "step": 13390 |
| }, |
| { |
| "epoch": 5.841325196163906, |
| "grad_norm": 0.48539432883262634, |
| "learning_rate": 3.082330852091497e-06, |
| "loss": 0.0344, |
| "step": 13400 |
| }, |
| { |
| "epoch": 5.84568439407149, |
| "grad_norm": 0.6521255373954773, |
| "learning_rate": 3.0443404150287847e-06, |
| "loss": 0.0307, |
| "step": 13410 |
| }, |
| { |
| "epoch": 5.850043591979076, |
| "grad_norm": 0.2879190146923065, |
| "learning_rate": 3.0065782001128475e-06, |
| "loss": 0.0283, |
| "step": 13420 |
| }, |
| { |
| "epoch": 5.854402789886661, |
| "grad_norm": 0.30713582038879395, |
| "learning_rate": 2.9690443908822252e-06, |
| "loss": 0.0254, |
| "step": 13430 |
| }, |
| { |
| "epoch": 5.858761987794246, |
| "grad_norm": 0.3112742304801941, |
| "learning_rate": 2.9317391697653518e-06, |
| "loss": 0.0265, |
| "step": 13440 |
| }, |
| { |
| "epoch": 5.86312118570183, |
| "grad_norm": 0.5539763569831848, |
| "learning_rate": 2.8946627180795936e-06, |
| "loss": 0.0381, |
| "step": 13450 |
| }, |
| { |
| "epoch": 5.867480383609416, |
| "grad_norm": 0.7893513441085815, |
| "learning_rate": 2.8578152160304573e-06, |
| "loss": 0.0382, |
| "step": 13460 |
| }, |
| { |
| "epoch": 5.871839581517001, |
| "grad_norm": 0.4452410042285919, |
| "learning_rate": 2.821196842710638e-06, |
| "loss": 0.0451, |
| "step": 13470 |
| }, |
| { |
| "epoch": 5.876198779424586, |
| "grad_norm": 0.38596704602241516, |
| "learning_rate": 2.7848077760991853e-06, |
| "loss": 0.0331, |
| "step": 13480 |
| }, |
| { |
| "epoch": 5.880557977332171, |
| "grad_norm": 0.4066772162914276, |
| "learning_rate": 2.7486481930606434e-06, |
| "loss": 0.0348, |
| "step": 13490 |
| }, |
| { |
| "epoch": 5.884917175239756, |
| "grad_norm": 0.3776564598083496, |
| "learning_rate": 2.712718269344161e-06, |
| "loss": 0.0225, |
| "step": 13500 |
| }, |
| { |
| "epoch": 5.889276373147341, |
| "grad_norm": 0.2993749678134918, |
| "learning_rate": 2.677018179582669e-06, |
| "loss": 0.0306, |
| "step": 13510 |
| }, |
| { |
| "epoch": 5.893635571054926, |
| "grad_norm": 0.3112500011920929, |
| "learning_rate": 2.641548097292024e-06, |
| "loss": 0.0376, |
| "step": 13520 |
| }, |
| { |
| "epoch": 5.897994768962511, |
| "grad_norm": 0.49900758266448975, |
| "learning_rate": 2.606308194870133e-06, |
| "loss": 0.0337, |
| "step": 13530 |
| }, |
| { |
| "epoch": 5.902353966870096, |
| "grad_norm": 0.49350616335868835, |
| "learning_rate": 2.5712986435961707e-06, |
| "loss": 0.03, |
| "step": 13540 |
| }, |
| { |
| "epoch": 5.906713164777681, |
| "grad_norm": 0.36458122730255127, |
| "learning_rate": 2.536519613629723e-06, |
| "loss": 0.0249, |
| "step": 13550 |
| }, |
| { |
| "epoch": 5.911072362685266, |
| "grad_norm": 0.35984691977500916, |
| "learning_rate": 2.501971274009923e-06, |
| "loss": 0.0378, |
| "step": 13560 |
| }, |
| { |
| "epoch": 5.915431560592851, |
| "grad_norm": 0.41873860359191895, |
| "learning_rate": 2.467653792654695e-06, |
| "loss": 0.0236, |
| "step": 13570 |
| }, |
| { |
| "epoch": 5.919790758500436, |
| "grad_norm": 0.9378716945648193, |
| "learning_rate": 2.4335673363598822e-06, |
| "loss": 0.0271, |
| "step": 13580 |
| }, |
| { |
| "epoch": 5.924149956408021, |
| "grad_norm": 0.380706787109375, |
| "learning_rate": 2.399712070798471e-06, |
| "loss": 0.0351, |
| "step": 13590 |
| }, |
| { |
| "epoch": 5.928509154315606, |
| "grad_norm": 0.42532041668891907, |
| "learning_rate": 2.3660881605197694e-06, |
| "loss": 0.0495, |
| "step": 13600 |
| }, |
| { |
| "epoch": 5.932868352223191, |
| "grad_norm": 0.22512225806713104, |
| "learning_rate": 2.332695768948617e-06, |
| "loss": 0.0243, |
| "step": 13610 |
| }, |
| { |
| "epoch": 5.937227550130776, |
| "grad_norm": 0.4298464059829712, |
| "learning_rate": 2.299535058384583e-06, |
| "loss": 0.0392, |
| "step": 13620 |
| }, |
| { |
| "epoch": 5.941586748038361, |
| "grad_norm": 0.3365100026130676, |
| "learning_rate": 2.266606190001186e-06, |
| "loss": 0.022, |
| "step": 13630 |
| }, |
| { |
| "epoch": 5.945945945945946, |
| "grad_norm": 0.38887470960617065, |
| "learning_rate": 2.2339093238450737e-06, |
| "loss": 0.0251, |
| "step": 13640 |
| }, |
| { |
| "epoch": 5.950305143853531, |
| "grad_norm": 0.8472762703895569, |
| "learning_rate": 2.20144461883533e-06, |
| "loss": 0.0332, |
| "step": 13650 |
| }, |
| { |
| "epoch": 5.954664341761116, |
| "grad_norm": 0.5774504542350769, |
| "learning_rate": 2.1692122327625908e-06, |
| "loss": 0.0352, |
| "step": 13660 |
| }, |
| { |
| "epoch": 5.959023539668701, |
| "grad_norm": 0.41319480538368225, |
| "learning_rate": 2.137212322288379e-06, |
| "loss": 0.0276, |
| "step": 13670 |
| }, |
| { |
| "epoch": 5.963382737576286, |
| "grad_norm": 0.18480074405670166, |
| "learning_rate": 2.105445042944282e-06, |
| "loss": 0.0252, |
| "step": 13680 |
| }, |
| { |
| "epoch": 5.967741935483871, |
| "grad_norm": 0.3829851448535919, |
| "learning_rate": 2.0739105491312027e-06, |
| "loss": 0.0313, |
| "step": 13690 |
| }, |
| { |
| "epoch": 5.972101133391456, |
| "grad_norm": 0.3687523603439331, |
| "learning_rate": 2.0426089941186443e-06, |
| "loss": 0.0264, |
| "step": 13700 |
| }, |
| { |
| "epoch": 5.976460331299041, |
| "grad_norm": 1.2374975681304932, |
| "learning_rate": 2.0115405300439093e-06, |
| "loss": 0.0267, |
| "step": 13710 |
| }, |
| { |
| "epoch": 5.9808195292066255, |
| "grad_norm": 0.49486619234085083, |
| "learning_rate": 1.9807053079114013e-06, |
| "loss": 0.0261, |
| "step": 13720 |
| }, |
| { |
| "epoch": 5.985178727114211, |
| "grad_norm": 0.4957039952278137, |
| "learning_rate": 1.9501034775919024e-06, |
| "loss": 0.0227, |
| "step": 13730 |
| }, |
| { |
| "epoch": 5.989537925021796, |
| "grad_norm": 0.4244577884674072, |
| "learning_rate": 1.9197351878217917e-06, |
| "loss": 0.0241, |
| "step": 13740 |
| }, |
| { |
| "epoch": 5.993897122929381, |
| "grad_norm": 0.19650620222091675, |
| "learning_rate": 1.8896005862023669e-06, |
| "loss": 0.027, |
| "step": 13750 |
| }, |
| { |
| "epoch": 5.998256320836966, |
| "grad_norm": 0.5099511742591858, |
| "learning_rate": 1.8596998191991288e-06, |
| "loss": 0.0466, |
| "step": 13760 |
| }, |
| { |
| "epoch": 6.002615518744551, |
| "grad_norm": 1.2363959550857544, |
| "learning_rate": 1.8300330321410208e-06, |
| "loss": 0.0273, |
| "step": 13770 |
| }, |
| { |
| "epoch": 6.006974716652136, |
| "grad_norm": 0.2866174280643463, |
| "learning_rate": 1.8006003692197794e-06, |
| "loss": 0.0265, |
| "step": 13780 |
| }, |
| { |
| "epoch": 6.011333914559721, |
| "grad_norm": 0.44128701090812683, |
| "learning_rate": 1.7714019734892062e-06, |
| "loss": 0.0314, |
| "step": 13790 |
| }, |
| { |
| "epoch": 6.015693112467306, |
| "grad_norm": 0.396158367395401, |
| "learning_rate": 1.7424379868644759e-06, |
| "loss": 0.0268, |
| "step": 13800 |
| }, |
| { |
| "epoch": 6.020052310374891, |
| "grad_norm": 0.38308459520339966, |
| "learning_rate": 1.71370855012144e-06, |
| "loss": 0.0317, |
| "step": 13810 |
| }, |
| { |
| "epoch": 6.024411508282476, |
| "grad_norm": 0.5613490343093872, |
| "learning_rate": 1.6852138028959574e-06, |
| "loss": 0.0299, |
| "step": 13820 |
| }, |
| { |
| "epoch": 6.028770706190061, |
| "grad_norm": 0.3245357573032379, |
| "learning_rate": 1.6569538836832044e-06, |
| "loss": 0.0282, |
| "step": 13830 |
| }, |
| { |
| "epoch": 6.033129904097646, |
| "grad_norm": 0.5527129173278809, |
| "learning_rate": 1.6289289298370147e-06, |
| "loss": 0.0309, |
| "step": 13840 |
| }, |
| { |
| "epoch": 6.037489102005231, |
| "grad_norm": 0.3975665867328644, |
| "learning_rate": 1.6011390775691748e-06, |
| "loss": 0.0221, |
| "step": 13850 |
| }, |
| { |
| "epoch": 6.041848299912816, |
| "grad_norm": 0.44122743606567383, |
| "learning_rate": 1.5735844619488238e-06, |
| "loss": 0.028, |
| "step": 13860 |
| }, |
| { |
| "epoch": 6.046207497820401, |
| "grad_norm": 0.2691154181957245, |
| "learning_rate": 1.5462652169017322e-06, |
| "loss": 0.0336, |
| "step": 13870 |
| }, |
| { |
| "epoch": 6.050566695727986, |
| "grad_norm": 1.3896781206130981, |
| "learning_rate": 1.5191814752097023e-06, |
| "loss": 0.0197, |
| "step": 13880 |
| }, |
| { |
| "epoch": 6.054925893635571, |
| "grad_norm": 0.4001099765300751, |
| "learning_rate": 1.492333368509896e-06, |
| "loss": 0.0355, |
| "step": 13890 |
| }, |
| { |
| "epoch": 6.059285091543156, |
| "grad_norm": 0.27171969413757324, |
| "learning_rate": 1.4657210272941923e-06, |
| "loss": 0.0202, |
| "step": 13900 |
| }, |
| { |
| "epoch": 6.063644289450741, |
| "grad_norm": 0.39832237362861633, |
| "learning_rate": 1.4393445809085748e-06, |
| "loss": 0.028, |
| "step": 13910 |
| }, |
| { |
| "epoch": 6.068003487358326, |
| "grad_norm": 0.7771658897399902, |
| "learning_rate": 1.4132041575524834e-06, |
| "loss": 0.0337, |
| "step": 13920 |
| }, |
| { |
| "epoch": 6.072362685265911, |
| "grad_norm": 1.9989441633224487, |
| "learning_rate": 1.387299884278187e-06, |
| "loss": 0.0294, |
| "step": 13930 |
| }, |
| { |
| "epoch": 6.076721883173496, |
| "grad_norm": 0.39735016226768494, |
| "learning_rate": 1.3616318869901945e-06, |
| "loss": 0.0291, |
| "step": 13940 |
| }, |
| { |
| "epoch": 6.081081081081081, |
| "grad_norm": 0.5451767444610596, |
| "learning_rate": 1.336200290444606e-06, |
| "loss": 0.0266, |
| "step": 13950 |
| }, |
| { |
| "epoch": 6.085440278988666, |
| "grad_norm": 0.18423081934452057, |
| "learning_rate": 1.3110052182485454e-06, |
| "loss": 0.0333, |
| "step": 13960 |
| }, |
| { |
| "epoch": 6.089799476896251, |
| "grad_norm": 0.427682489156723, |
| "learning_rate": 1.2860467928595298e-06, |
| "loss": 0.0489, |
| "step": 13970 |
| }, |
| { |
| "epoch": 6.094158674803836, |
| "grad_norm": 0.534563422203064, |
| "learning_rate": 1.2613251355848732e-06, |
| "loss": 0.0337, |
| "step": 13980 |
| }, |
| { |
| "epoch": 6.098517872711421, |
| "grad_norm": 0.5575711727142334, |
| "learning_rate": 1.2368403665811324e-06, |
| "loss": 0.0337, |
| "step": 13990 |
| }, |
| { |
| "epoch": 6.102877070619006, |
| "grad_norm": 0.2626562714576721, |
| "learning_rate": 1.2125926048534686e-06, |
| "loss": 0.0362, |
| "step": 14000 |
| }, |
| { |
| "epoch": 6.1072362685265915, |
| "grad_norm": 0.6961496472358704, |
| "learning_rate": 1.1885819682551259e-06, |
| "loss": 0.0253, |
| "step": 14010 |
| }, |
| { |
| "epoch": 6.111595466434176, |
| "grad_norm": 0.687218964099884, |
| "learning_rate": 1.164808573486814e-06, |
| "loss": 0.0343, |
| "step": 14020 |
| }, |
| { |
| "epoch": 6.1159546643417615, |
| "grad_norm": 0.5468307733535767, |
| "learning_rate": 1.1412725360961608e-06, |
| "loss": 0.0368, |
| "step": 14030 |
| }, |
| { |
| "epoch": 6.120313862249346, |
| "grad_norm": 0.6597806811332703, |
| "learning_rate": 1.1179739704771486e-06, |
| "loss": 0.021, |
| "step": 14040 |
| }, |
| { |
| "epoch": 6.1246730601569315, |
| "grad_norm": 0.30327489972114563, |
| "learning_rate": 1.0949129898695675e-06, |
| "loss": 0.0283, |
| "step": 14050 |
| }, |
| { |
| "epoch": 6.129032258064516, |
| "grad_norm": 0.4793466627597809, |
| "learning_rate": 1.0720897063584423e-06, |
| "loss": 0.0285, |
| "step": 14060 |
| }, |
| { |
| "epoch": 6.1333914559721014, |
| "grad_norm": 0.5061814785003662, |
| "learning_rate": 1.0495042308735103e-06, |
| "loss": 0.0324, |
| "step": 14070 |
| }, |
| { |
| "epoch": 6.137750653879686, |
| "grad_norm": 0.6382007598876953, |
| "learning_rate": 1.0271566731886617e-06, |
| "loss": 0.023, |
| "step": 14080 |
| }, |
| { |
| "epoch": 6.142109851787271, |
| "grad_norm": 0.7890705466270447, |
| "learning_rate": 1.005047141921428e-06, |
| "loss": 0.0271, |
| "step": 14090 |
| }, |
| { |
| "epoch": 6.146469049694856, |
| "grad_norm": 0.9838564991950989, |
| "learning_rate": 9.831757445324274e-07, |
| "loss": 0.0394, |
| "step": 14100 |
| }, |
| { |
| "epoch": 6.150828247602441, |
| "grad_norm": 0.3258971571922302, |
| "learning_rate": 9.615425873248761e-07, |
| "loss": 0.0271, |
| "step": 14110 |
| }, |
| { |
| "epoch": 6.155187445510026, |
| "grad_norm": 0.5593294501304626, |
| "learning_rate": 9.401477754440502e-07, |
| "loss": 0.0283, |
| "step": 14120 |
| }, |
| { |
| "epoch": 6.159546643417611, |
| "grad_norm": 0.4928247332572937, |
| "learning_rate": 9.189914128767684e-07, |
| "loss": 0.0366, |
| "step": 14130 |
| }, |
| { |
| "epoch": 6.163905841325196, |
| "grad_norm": 0.2662743031978607, |
| "learning_rate": 8.980736024508996e-07, |
| "loss": 0.0348, |
| "step": 14140 |
| }, |
| { |
| "epoch": 6.168265039232781, |
| "grad_norm": 0.6286730170249939, |
| "learning_rate": 8.77394445834867e-07, |
| "loss": 0.0237, |
| "step": 14150 |
| }, |
| { |
| "epoch": 6.172624237140366, |
| "grad_norm": 0.6010546088218689, |
| "learning_rate": 8.569540435371281e-07, |
| "loss": 0.0362, |
| "step": 14160 |
| }, |
| { |
| "epoch": 6.176983435047951, |
| "grad_norm": 0.32207077741622925, |
| "learning_rate": 8.367524949057348e-07, |
| "loss": 0.0282, |
| "step": 14170 |
| }, |
| { |
| "epoch": 6.181342632955536, |
| "grad_norm": 0.18507017195224762, |
| "learning_rate": 8.167898981277844e-07, |
| "loss": 0.0313, |
| "step": 14180 |
| }, |
| { |
| "epoch": 6.185701830863121, |
| "grad_norm": 0.5099960565567017, |
| "learning_rate": 7.970663502290143e-07, |
| "loss": 0.0289, |
| "step": 14190 |
| }, |
| { |
| "epoch": 6.190061028770706, |
| "grad_norm": 0.4452384114265442, |
| "learning_rate": 7.775819470732692e-07, |
| "loss": 0.0268, |
| "step": 14200 |
| }, |
| { |
| "epoch": 6.194420226678291, |
| "grad_norm": 0.2529633939266205, |
| "learning_rate": 7.583367833620681e-07, |
| "loss": 0.0317, |
| "step": 14210 |
| }, |
| { |
| "epoch": 6.198779424585876, |
| "grad_norm": 0.2038535624742508, |
| "learning_rate": 7.39330952634143e-07, |
| "loss": 0.032, |
| "step": 14220 |
| }, |
| { |
| "epoch": 6.203138622493461, |
| "grad_norm": 0.4185069799423218, |
| "learning_rate": 7.205645472649681e-07, |
| "loss": 0.0301, |
| "step": 14230 |
| }, |
| { |
| "epoch": 6.207497820401046, |
| "grad_norm": 0.357864111661911, |
| "learning_rate": 7.020376584663202e-07, |
| "loss": 0.0255, |
| "step": 14240 |
| }, |
| { |
| "epoch": 6.211857018308631, |
| "grad_norm": 0.4020203649997711, |
| "learning_rate": 6.83750376285841e-07, |
| "loss": 0.0389, |
| "step": 14250 |
| }, |
| { |
| "epoch": 6.216216216216216, |
| "grad_norm": 1.3740479946136475, |
| "learning_rate": 6.657027896065982e-07, |
| "loss": 0.0309, |
| "step": 14260 |
| }, |
| { |
| "epoch": 6.220575414123801, |
| "grad_norm": 0.3854996860027313, |
| "learning_rate": 6.478949861466355e-07, |
| "loss": 0.0392, |
| "step": 14270 |
| }, |
| { |
| "epoch": 6.224934612031387, |
| "grad_norm": 0.6102597117424011, |
| "learning_rate": 6.303270524585736e-07, |
| "loss": 0.0313, |
| "step": 14280 |
| }, |
| { |
| "epoch": 6.229293809938971, |
| "grad_norm": 0.25067058205604553, |
| "learning_rate": 6.129990739291713e-07, |
| "loss": 0.0242, |
| "step": 14290 |
| }, |
| { |
| "epoch": 6.233653007846557, |
| "grad_norm": 0.572243332862854, |
| "learning_rate": 5.959111347789093e-07, |
| "loss": 0.0353, |
| "step": 14300 |
| }, |
| { |
| "epoch": 6.238012205754141, |
| "grad_norm": 0.49448612332344055, |
| "learning_rate": 5.790633180615956e-07, |
| "loss": 0.0245, |
| "step": 14310 |
| }, |
| { |
| "epoch": 6.242371403661727, |
| "grad_norm": 0.22109507024288177, |
| "learning_rate": 5.624557056639446e-07, |
| "loss": 0.0411, |
| "step": 14320 |
| }, |
| { |
| "epoch": 6.246730601569311, |
| "grad_norm": 0.3474348485469818, |
| "learning_rate": 5.460883783051984e-07, |
| "loss": 0.0233, |
| "step": 14330 |
| }, |
| { |
| "epoch": 6.251089799476897, |
| "grad_norm": 0.18911020457744598, |
| "learning_rate": 5.299614155367171e-07, |
| "loss": 0.0285, |
| "step": 14340 |
| }, |
| { |
| "epoch": 6.255448997384481, |
| "grad_norm": 0.5895888209342957, |
| "learning_rate": 5.140748957415897e-07, |
| "loss": 0.0292, |
| "step": 14350 |
| }, |
| { |
| "epoch": 6.259808195292067, |
| "grad_norm": 0.19754233956336975, |
| "learning_rate": 4.984288961342787e-07, |
| "loss": 0.0242, |
| "step": 14360 |
| }, |
| { |
| "epoch": 6.264167393199651, |
| "grad_norm": 0.3680201470851898, |
| "learning_rate": 4.830234927602206e-07, |
| "loss": 0.0283, |
| "step": 14370 |
| }, |
| { |
| "epoch": 6.2685265911072365, |
| "grad_norm": 0.4519497752189636, |
| "learning_rate": 4.6785876049545986e-07, |
| "loss": 0.0299, |
| "step": 14380 |
| }, |
| { |
| "epoch": 6.272885789014821, |
| "grad_norm": 0.3829212486743927, |
| "learning_rate": 4.5293477304629297e-07, |
| "loss": 0.0373, |
| "step": 14390 |
| }, |
| { |
| "epoch": 6.2772449869224065, |
| "grad_norm": 0.27076077461242676, |
| "learning_rate": 4.382516029489081e-07, |
| "loss": 0.0338, |
| "step": 14400 |
| }, |
| { |
| "epoch": 6.281604184829991, |
| "grad_norm": 0.32891130447387695, |
| "learning_rate": 4.2380932156902975e-07, |
| "loss": 0.0263, |
| "step": 14410 |
| }, |
| { |
| "epoch": 6.2859633827375765, |
| "grad_norm": 0.380852609872818, |
| "learning_rate": 4.0960799910156335e-07, |
| "loss": 0.0314, |
| "step": 14420 |
| }, |
| { |
| "epoch": 6.290322580645161, |
| "grad_norm": 0.39561882615089417, |
| "learning_rate": 3.956477045702844e-07, |
| "loss": 0.0313, |
| "step": 14430 |
| }, |
| { |
| "epoch": 6.2946817785527465, |
| "grad_norm": 0.40406253933906555, |
| "learning_rate": 3.819285058274613e-07, |
| "loss": 0.0281, |
| "step": 14440 |
| }, |
| { |
| "epoch": 6.299040976460331, |
| "grad_norm": 0.4073195159435272, |
| "learning_rate": 3.684504695535496e-07, |
| "loss": 0.0237, |
| "step": 14450 |
| }, |
| { |
| "epoch": 6.303400174367916, |
| "grad_norm": 1.0025192499160767, |
| "learning_rate": 3.552136612568813e-07, |
| "loss": 0.03, |
| "step": 14460 |
| }, |
| { |
| "epoch": 6.307759372275501, |
| "grad_norm": 0.7018080353736877, |
| "learning_rate": 3.422181452733042e-07, |
| "loss": 0.0352, |
| "step": 14470 |
| }, |
| { |
| "epoch": 6.312118570183086, |
| "grad_norm": 0.6284013390541077, |
| "learning_rate": 3.294639847659209e-07, |
| "loss": 0.0246, |
| "step": 14480 |
| }, |
| { |
| "epoch": 6.316477768090671, |
| "grad_norm": 0.3403724431991577, |
| "learning_rate": 3.169512417247389e-07, |
| "loss": 0.0259, |
| "step": 14490 |
| }, |
| { |
| "epoch": 6.320836965998256, |
| "grad_norm": 0.7513018250465393, |
| "learning_rate": 3.046799769663822e-07, |
| "loss": 0.0327, |
| "step": 14500 |
| }, |
| { |
| "epoch": 6.325196163905841, |
| "grad_norm": 0.42642244696617126, |
| "learning_rate": 2.926502501338191e-07, |
| "loss": 0.0301, |
| "step": 14510 |
| }, |
| { |
| "epoch": 6.329555361813426, |
| "grad_norm": 0.184515580534935, |
| "learning_rate": 2.808621196960404e-07, |
| "loss": 0.0234, |
| "step": 14520 |
| }, |
| { |
| "epoch": 6.333914559721011, |
| "grad_norm": 0.4309402108192444, |
| "learning_rate": 2.6931564294778164e-07, |
| "loss": 0.0478, |
| "step": 14530 |
| }, |
| { |
| "epoch": 6.338273757628596, |
| "grad_norm": 0.1840837150812149, |
| "learning_rate": 2.58010876009257e-07, |
| "loss": 0.0282, |
| "step": 14540 |
| }, |
| { |
| "epoch": 6.342632955536182, |
| "grad_norm": 0.797314465045929, |
| "learning_rate": 2.4694787382589237e-07, |
| "loss": 0.0311, |
| "step": 14550 |
| }, |
| { |
| "epoch": 6.346992153443766, |
| "grad_norm": 0.5150383114814758, |
| "learning_rate": 2.3612669016802592e-07, |
| "loss": 0.0269, |
| "step": 14560 |
| }, |
| { |
| "epoch": 6.351351351351352, |
| "grad_norm": 0.3189009428024292, |
| "learning_rate": 2.2554737763068045e-07, |
| "loss": 0.022, |
| "step": 14570 |
| }, |
| { |
| "epoch": 6.355710549258936, |
| "grad_norm": 0.34953877329826355, |
| "learning_rate": 2.152099876332858e-07, |
| "loss": 0.0328, |
| "step": 14580 |
| }, |
| { |
| "epoch": 6.360069747166522, |
| "grad_norm": 0.204021617770195, |
| "learning_rate": 2.051145704194457e-07, |
| "loss": 0.032, |
| "step": 14590 |
| }, |
| { |
| "epoch": 6.364428945074106, |
| "grad_norm": 1.0681053400039673, |
| "learning_rate": 1.9526117505667129e-07, |
| "loss": 0.0345, |
| "step": 14600 |
| }, |
| { |
| "epoch": 6.368788142981692, |
| "grad_norm": 0.1900322437286377, |
| "learning_rate": 1.856498494361758e-07, |
| "loss": 0.0279, |
| "step": 14610 |
| }, |
| { |
| "epoch": 6.373147340889276, |
| "grad_norm": 0.967194676399231, |
| "learning_rate": 1.7628064027260803e-07, |
| "loss": 0.0269, |
| "step": 14620 |
| }, |
| { |
| "epoch": 6.377506538796862, |
| "grad_norm": 0.4180316925048828, |
| "learning_rate": 1.671535931038415e-07, |
| "loss": 0.0247, |
| "step": 14630 |
| }, |
| { |
| "epoch": 6.381865736704446, |
| "grad_norm": 0.31126174330711365, |
| "learning_rate": 1.5826875229076333e-07, |
| "loss": 0.0256, |
| "step": 14640 |
| }, |
| { |
| "epoch": 6.386224934612032, |
| "grad_norm": 0.33545228838920593, |
| "learning_rate": 1.496261610170302e-07, |
| "loss": 0.0283, |
| "step": 14650 |
| }, |
| { |
| "epoch": 6.390584132519616, |
| "grad_norm": 0.6360080242156982, |
| "learning_rate": 1.4122586128888503e-07, |
| "loss": 0.0274, |
| "step": 14660 |
| }, |
| { |
| "epoch": 6.394943330427202, |
| "grad_norm": 0.288099467754364, |
| "learning_rate": 1.3306789393494612e-07, |
| "loss": 0.0368, |
| "step": 14670 |
| }, |
| { |
| "epoch": 6.399302528334786, |
| "grad_norm": 1.516507863998413, |
| "learning_rate": 1.2515229860599054e-07, |
| "loss": 0.028, |
| "step": 14680 |
| }, |
| { |
| "epoch": 6.403661726242372, |
| "grad_norm": 0.37025946378707886, |
| "learning_rate": 1.1747911377478771e-07, |
| "loss": 0.0281, |
| "step": 14690 |
| }, |
| { |
| "epoch": 6.408020924149956, |
| "grad_norm": 0.33924514055252075, |
| "learning_rate": 1.1004837673589952e-07, |
| "loss": 0.0283, |
| "step": 14700 |
| }, |
| { |
| "epoch": 6.412380122057542, |
| "grad_norm": 0.3753373622894287, |
| "learning_rate": 1.0286012360550267e-07, |
| "loss": 0.0343, |
| "step": 14710 |
| }, |
| { |
| "epoch": 6.416739319965126, |
| "grad_norm": 0.509145975112915, |
| "learning_rate": 9.591438932121111e-08, |
| "loss": 0.0344, |
| "step": 14720 |
| }, |
| { |
| "epoch": 6.421098517872712, |
| "grad_norm": 1.0932096242904663, |
| "learning_rate": 8.921120764189272e-08, |
| "loss": 0.0352, |
| "step": 14730 |
| }, |
| { |
| "epoch": 6.425457715780296, |
| "grad_norm": 1.1470388174057007, |
| "learning_rate": 8.275061114753068e-08, |
| "loss": 0.0353, |
| "step": 14740 |
| }, |
| { |
| "epoch": 6.4298169136878816, |
| "grad_norm": 0.9916203022003174, |
| "learning_rate": 7.65326312390624e-08, |
| "loss": 0.0385, |
| "step": 14750 |
| }, |
| { |
| "epoch": 6.434176111595466, |
| "grad_norm": 0.6590924263000488, |
| "learning_rate": 7.055729813819079e-08, |
| "loss": 0.0306, |
| "step": 14760 |
| }, |
| { |
| "epoch": 6.4385353095030515, |
| "grad_norm": 0.45894575119018555, |
| "learning_rate": 6.48246408872899e-08, |
| "loss": 0.0266, |
| "step": 14770 |
| }, |
| { |
| "epoch": 6.442894507410636, |
| "grad_norm": 0.3868933618068695, |
| "learning_rate": 5.9334687349227314e-08, |
| "loss": 0.0286, |
| "step": 14780 |
| }, |
| { |
| "epoch": 6.4472537053182215, |
| "grad_norm": 0.4164086878299713, |
| "learning_rate": 5.4087464207236426e-08, |
| "loss": 0.0203, |
| "step": 14790 |
| }, |
| { |
| "epoch": 6.451612903225806, |
| "grad_norm": 0.7725594639778137, |
| "learning_rate": 4.9082996964794345e-08, |
| "loss": 0.0282, |
| "step": 14800 |
| }, |
| { |
| "epoch": 6.4559721011333915, |
| "grad_norm": 0.35307568311691284, |
| "learning_rate": 4.432130994548866e-08, |
| "loss": 0.0228, |
| "step": 14810 |
| }, |
| { |
| "epoch": 6.460331299040977, |
| "grad_norm": 0.4140183925628662, |
| "learning_rate": 3.980242629291198e-08, |
| "loss": 0.0265, |
| "step": 14820 |
| }, |
| { |
| "epoch": 6.4646904969485615, |
| "grad_norm": 0.37078022956848145, |
| "learning_rate": 3.5526367970539765e-08, |
| "loss": 0.0329, |
| "step": 14830 |
| }, |
| { |
| "epoch": 6.469049694856146, |
| "grad_norm": 0.6152135729789734, |
| "learning_rate": 3.1493155761613826e-08, |
| "loss": 0.04, |
| "step": 14840 |
| }, |
| { |
| "epoch": 6.473408892763731, |
| "grad_norm": 0.7362766861915588, |
| "learning_rate": 2.7702809269058992e-08, |
| "loss": 0.0417, |
| "step": 14850 |
| }, |
| { |
| "epoch": 6.477768090671317, |
| "grad_norm": 1.0350167751312256, |
| "learning_rate": 2.4155346915394337e-08, |
| "loss": 0.0281, |
| "step": 14860 |
| }, |
| { |
| "epoch": 6.482127288578901, |
| "grad_norm": 0.35365110635757446, |
| "learning_rate": 2.085078594261103e-08, |
| "loss": 0.0276, |
| "step": 14870 |
| }, |
| { |
| "epoch": 6.486486486486487, |
| "grad_norm": 0.7158024311065674, |
| "learning_rate": 1.7789142412122372e-08, |
| "loss": 0.0341, |
| "step": 14880 |
| }, |
| { |
| "epoch": 6.490845684394071, |
| "grad_norm": 1.3772259950637817, |
| "learning_rate": 1.4970431204663905e-08, |
| "loss": 0.0292, |
| "step": 14890 |
| }, |
| { |
| "epoch": 6.495204882301657, |
| "grad_norm": 0.4312567710876465, |
| "learning_rate": 1.2394666020226764e-08, |
| "loss": 0.0237, |
| "step": 14900 |
| }, |
| { |
| "epoch": 6.499564080209241, |
| "grad_norm": 0.9634395837783813, |
| "learning_rate": 1.0061859378007743e-08, |
| "loss": 0.0325, |
| "step": 14910 |
| }, |
| { |
| "epoch": 6.503923278116827, |
| "grad_norm": 0.502382755279541, |
| "learning_rate": 7.97202261630936e-09, |
| "loss": 0.0192, |
| "step": 14920 |
| }, |
| { |
| "epoch": 6.508282476024411, |
| "grad_norm": 0.6565430164337158, |
| "learning_rate": 6.125165892539863e-09, |
| "loss": 0.0319, |
| "step": 14930 |
| }, |
| { |
| "epoch": 6.512641673931997, |
| "grad_norm": 0.23635677993297577, |
| "learning_rate": 4.5212981831022076e-09, |
| "loss": 0.0391, |
| "step": 14940 |
| }, |
| { |
| "epoch": 6.517000871839581, |
| "grad_norm": 0.5301797389984131, |
| "learning_rate": 3.1604272834051542e-09, |
| "loss": 0.0402, |
| "step": 14950 |
| }, |
| { |
| "epoch": 6.521360069747167, |
| "grad_norm": 0.11095762252807617, |
| "learning_rate": 2.04255980778556e-09, |
| "loss": 0.0284, |
| "step": 14960 |
| }, |
| { |
| "epoch": 6.525719267654751, |
| "grad_norm": 0.40208858251571655, |
| "learning_rate": 1.1677011895028234e-09, |
| "loss": 0.0279, |
| "step": 14970 |
| }, |
| { |
| "epoch": 6.530078465562337, |
| "grad_norm": 0.3951982259750366, |
| "learning_rate": 5.358556807000259e-10, |
| "loss": 0.0272, |
| "step": 14980 |
| }, |
| { |
| "epoch": 6.534437663469921, |
| "grad_norm": 0.6511605381965637, |
| "learning_rate": 1.4702635238728058e-10, |
| "loss": 0.0299, |
| "step": 14990 |
| }, |
| { |
| "epoch": 6.538796861377507, |
| "grad_norm": 0.603900134563446, |
| "learning_rate": 1.2150944139754927e-12, |
| "loss": 0.0342, |
| "step": 15000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 7, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|