| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 18.21668264621285, |
| "eval_steps": 500, |
| "global_step": 19000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009587727708533078, |
| "grad_norm": 27.445323944091797, |
| "learning_rate": 9.473684210526317e-07, |
| "loss": 2.1709, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019175455417066157, |
| "grad_norm": 19.005075454711914, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.8704, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.028763183125599234, |
| "grad_norm": 14.785849571228027, |
| "learning_rate": 3.0526315789473684e-06, |
| "loss": 1.6318, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.038350910834132314, |
| "grad_norm": 4.634030342102051, |
| "learning_rate": 4.105263157894737e-06, |
| "loss": 0.8641, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04793863854266539, |
| "grad_norm": 2.2945172786712646, |
| "learning_rate": 5.1578947368421055e-06, |
| "loss": 0.5394, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05752636625119847, |
| "grad_norm": 1.7087739706039429, |
| "learning_rate": 6.2105263157894745e-06, |
| "loss": 0.4525, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06711409395973154, |
| "grad_norm": 1.1094379425048828, |
| "learning_rate": 7.2631578947368426e-06, |
| "loss": 0.3063, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07670182166826463, |
| "grad_norm": 1.5301676988601685, |
| "learning_rate": 8.315789473684212e-06, |
| "loss": 0.3153, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0862895493767977, |
| "grad_norm": 1.1719224452972412, |
| "learning_rate": 9.368421052631579e-06, |
| "loss": 0.2466, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09587727708533078, |
| "grad_norm": 1.751291275024414, |
| "learning_rate": 1.0421052631578948e-05, |
| "loss": 0.27, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10546500479386385, |
| "grad_norm": 1.0524818897247314, |
| "learning_rate": 1.1473684210526315e-05, |
| "loss": 0.2333, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11505273250239693, |
| "grad_norm": 1.508988857269287, |
| "learning_rate": 1.2526315789473686e-05, |
| "loss": 0.2399, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12464046021093, |
| "grad_norm": 1.3286081552505493, |
| "learning_rate": 1.3578947368421053e-05, |
| "loss": 0.1962, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 1.7412567138671875, |
| "learning_rate": 1.4631578947368422e-05, |
| "loss": 0.2004, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14381591562799617, |
| "grad_norm": 1.8567883968353271, |
| "learning_rate": 1.568421052631579e-05, |
| "loss": 0.174, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.15340364333652926, |
| "grad_norm": 1.5139102935791016, |
| "learning_rate": 1.673684210526316e-05, |
| "loss": 0.1765, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1629913710450623, |
| "grad_norm": 1.6859902143478394, |
| "learning_rate": 1.7789473684210527e-05, |
| "loss": 0.168, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1725790987535954, |
| "grad_norm": 1.8252370357513428, |
| "learning_rate": 1.8842105263157894e-05, |
| "loss": 0.1645, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18216682646212848, |
| "grad_norm": 1.2732850313186646, |
| "learning_rate": 1.9894736842105265e-05, |
| "loss": 0.1554, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.19175455417066156, |
| "grad_norm": 1.0456390380859375, |
| "learning_rate": 2.0947368421052632e-05, |
| "loss": 0.1575, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.20134228187919462, |
| "grad_norm": 0.7651330828666687, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.163, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2109300095877277, |
| "grad_norm": 0.9984806776046753, |
| "learning_rate": 2.305263157894737e-05, |
| "loss": 0.1508, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22051773729626079, |
| "grad_norm": 1.0750813484191895, |
| "learning_rate": 2.410526315789474e-05, |
| "loss": 0.1349, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23010546500479387, |
| "grad_norm": 1.7777466773986816, |
| "learning_rate": 2.5157894736842108e-05, |
| "loss": 0.1448, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.23969319271332695, |
| "grad_norm": 1.3516716957092285, |
| "learning_rate": 2.6210526315789475e-05, |
| "loss": 0.1427, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.24928092042186, |
| "grad_norm": 1.1810095310211182, |
| "learning_rate": 2.7263157894736846e-05, |
| "loss": 0.1385, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2588686481303931, |
| "grad_norm": 1.6512832641601562, |
| "learning_rate": 2.8315789473684213e-05, |
| "loss": 0.155, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 1.2209525108337402, |
| "learning_rate": 2.9368421052631577e-05, |
| "loss": 0.1378, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.27804410354745923, |
| "grad_norm": 1.0236748456954956, |
| "learning_rate": 3.042105263157895e-05, |
| "loss": 0.1409, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.28763183125599234, |
| "grad_norm": 1.065836787223816, |
| "learning_rate": 3.147368421052632e-05, |
| "loss": 0.1409, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2972195589645254, |
| "grad_norm": 1.0454283952713013, |
| "learning_rate": 3.2526315789473686e-05, |
| "loss": 0.1333, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3068072866730585, |
| "grad_norm": 0.5515532493591309, |
| "learning_rate": 3.357894736842105e-05, |
| "loss": 0.1137, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.31639501438159157, |
| "grad_norm": 1.323104977607727, |
| "learning_rate": 3.463157894736842e-05, |
| "loss": 0.1317, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3259827420901246, |
| "grad_norm": 1.5426658391952515, |
| "learning_rate": 3.5684210526315794e-05, |
| "loss": 0.1174, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33557046979865773, |
| "grad_norm": 0.9131991863250732, |
| "learning_rate": 3.673684210526316e-05, |
| "loss": 0.1171, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3451581975071908, |
| "grad_norm": 1.0024508237838745, |
| "learning_rate": 3.778947368421053e-05, |
| "loss": 0.1162, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3547459252157239, |
| "grad_norm": 1.1091963052749634, |
| "learning_rate": 3.8842105263157896e-05, |
| "loss": 0.1272, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36433365292425696, |
| "grad_norm": 0.9772627949714661, |
| "learning_rate": 3.989473684210526e-05, |
| "loss": 0.1059, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.37392138063279, |
| "grad_norm": 0.92393958568573, |
| "learning_rate": 4.094736842105264e-05, |
| "loss": 0.113, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3835091083413231, |
| "grad_norm": 0.9960997700691223, |
| "learning_rate": 4.2e-05, |
| "loss": 0.1077, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3930968360498562, |
| "grad_norm": 1.0618188381195068, |
| "learning_rate": 4.305263157894737e-05, |
| "loss": 0.1084, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 0.7491030693054199, |
| "learning_rate": 4.410526315789474e-05, |
| "loss": 0.1021, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.41227229146692235, |
| "grad_norm": 0.9327500462532043, |
| "learning_rate": 4.515789473684211e-05, |
| "loss": 0.0984, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4218600191754554, |
| "grad_norm": 0.7720574140548706, |
| "learning_rate": 4.6210526315789473e-05, |
| "loss": 0.0971, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4314477468839885, |
| "grad_norm": 1.2057392597198486, |
| "learning_rate": 4.726315789473684e-05, |
| "loss": 0.1088, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.44103547459252157, |
| "grad_norm": 1.1223393678665161, |
| "learning_rate": 4.8315789473684215e-05, |
| "loss": 0.0992, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4506232023010546, |
| "grad_norm": 0.6742480397224426, |
| "learning_rate": 4.936842105263158e-05, |
| "loss": 0.0963, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.46021093000958774, |
| "grad_norm": 1.0714161396026611, |
| "learning_rate": 5.042105263157895e-05, |
| "loss": 0.0974, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4697986577181208, |
| "grad_norm": 0.7936097383499146, |
| "learning_rate": 5.1473684210526317e-05, |
| "loss": 0.1022, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4793863854266539, |
| "grad_norm": 1.4822968244552612, |
| "learning_rate": 5.252631578947369e-05, |
| "loss": 0.0996, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.48897411313518696, |
| "grad_norm": 1.0476019382476807, |
| "learning_rate": 5.357894736842105e-05, |
| "loss": 0.1018, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.49856184084372, |
| "grad_norm": 0.9343310594558716, |
| "learning_rate": 5.4631578947368425e-05, |
| "loss": 0.102, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5081495685522531, |
| "grad_norm": 0.8918314576148987, |
| "learning_rate": 5.568421052631579e-05, |
| "loss": 0.0986, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5177372962607862, |
| "grad_norm": 1.155029296875, |
| "learning_rate": 5.6736842105263166e-05, |
| "loss": 0.1031, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5273250239693192, |
| "grad_norm": 0.625169038772583, |
| "learning_rate": 5.778947368421053e-05, |
| "loss": 0.0907, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 1.0989243984222412, |
| "learning_rate": 5.88421052631579e-05, |
| "loss": 0.0843, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5465004793863855, |
| "grad_norm": 0.8834158778190613, |
| "learning_rate": 5.989473684210527e-05, |
| "loss": 0.0777, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5560882070949185, |
| "grad_norm": 0.7638639211654663, |
| "learning_rate": 6.094736842105263e-05, |
| "loss": 0.0781, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5656759348034516, |
| "grad_norm": 1.2423137426376343, |
| "learning_rate": 6.2e-05, |
| "loss": 0.0886, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5752636625119847, |
| "grad_norm": 1.082046627998352, |
| "learning_rate": 6.305263157894738e-05, |
| "loss": 0.0921, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5848513902205177, |
| "grad_norm": 0.8878996968269348, |
| "learning_rate": 6.410526315789474e-05, |
| "loss": 0.0926, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5944391179290508, |
| "grad_norm": 0.80406653881073, |
| "learning_rate": 6.515789473684211e-05, |
| "loss": 0.0983, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6040268456375839, |
| "grad_norm": 0.8726837038993835, |
| "learning_rate": 6.621052631578947e-05, |
| "loss": 0.0833, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.613614573346117, |
| "grad_norm": 0.9104009866714478, |
| "learning_rate": 6.726315789473685e-05, |
| "loss": 0.0884, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.62320230105465, |
| "grad_norm": 0.6089403629302979, |
| "learning_rate": 6.83157894736842e-05, |
| "loss": 0.0835, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6327900287631831, |
| "grad_norm": 0.8488327860832214, |
| "learning_rate": 6.936842105263158e-05, |
| "loss": 0.0812, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6423777564717162, |
| "grad_norm": 1.121718168258667, |
| "learning_rate": 7.042105263157895e-05, |
| "loss": 0.0979, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6519654841802492, |
| "grad_norm": 0.554762065410614, |
| "learning_rate": 7.147368421052631e-05, |
| "loss": 0.0941, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6615532118887824, |
| "grad_norm": 0.8173949718475342, |
| "learning_rate": 7.252631578947369e-05, |
| "loss": 0.09, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 0.9960802793502808, |
| "learning_rate": 7.357894736842106e-05, |
| "loss": 0.0969, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6807286673058485, |
| "grad_norm": 0.9952852725982666, |
| "learning_rate": 7.463157894736844e-05, |
| "loss": 0.0927, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6903163950143816, |
| "grad_norm": 1.1024588346481323, |
| "learning_rate": 7.56842105263158e-05, |
| "loss": 0.0874, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6999041227229147, |
| "grad_norm": 0.7529568672180176, |
| "learning_rate": 7.673684210526316e-05, |
| "loss": 0.0853, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7094918504314478, |
| "grad_norm": 0.8373092412948608, |
| "learning_rate": 7.778947368421053e-05, |
| "loss": 0.0783, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7190795781399808, |
| "grad_norm": 0.6158662438392639, |
| "learning_rate": 7.884210526315789e-05, |
| "loss": 0.0872, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7286673058485139, |
| "grad_norm": 0.7315576076507568, |
| "learning_rate": 7.989473684210527e-05, |
| "loss": 0.0841, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.738255033557047, |
| "grad_norm": 0.5791612267494202, |
| "learning_rate": 8.094736842105264e-05, |
| "loss": 0.0706, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.74784276126558, |
| "grad_norm": 0.8657413721084595, |
| "learning_rate": 8.2e-05, |
| "loss": 0.0689, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7574304889741131, |
| "grad_norm": 0.9742875695228577, |
| "learning_rate": 8.305263157894737e-05, |
| "loss": 0.0869, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7670182166826462, |
| "grad_norm": 0.7406681776046753, |
| "learning_rate": 8.410526315789475e-05, |
| "loss": 0.0869, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7766059443911792, |
| "grad_norm": 1.168278455734253, |
| "learning_rate": 8.515789473684211e-05, |
| "loss": 0.0803, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7861936720997124, |
| "grad_norm": 1.1049866676330566, |
| "learning_rate": 8.621052631578947e-05, |
| "loss": 0.0851, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7957813998082455, |
| "grad_norm": 0.9790105223655701, |
| "learning_rate": 8.726315789473684e-05, |
| "loss": 0.0788, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8053691275167785, |
| "grad_norm": 0.762137770652771, |
| "learning_rate": 8.831578947368422e-05, |
| "loss": 0.0715, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8149568552253116, |
| "grad_norm": 0.8730412125587463, |
| "learning_rate": 8.936842105263158e-05, |
| "loss": 0.0898, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8245445829338447, |
| "grad_norm": 1.1794781684875488, |
| "learning_rate": 9.042105263157895e-05, |
| "loss": 0.0798, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8341323106423778, |
| "grad_norm": 0.7828540205955505, |
| "learning_rate": 9.147368421052633e-05, |
| "loss": 0.0848, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8437200383509108, |
| "grad_norm": 0.7496788501739502, |
| "learning_rate": 9.252631578947369e-05, |
| "loss": 0.0836, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8533077660594439, |
| "grad_norm": 0.7298113703727722, |
| "learning_rate": 9.357894736842106e-05, |
| "loss": 0.0804, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.862895493767977, |
| "grad_norm": 0.7915740609169006, |
| "learning_rate": 9.463157894736842e-05, |
| "loss": 0.0978, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.87248322147651, |
| "grad_norm": 0.6587068438529968, |
| "learning_rate": 9.56842105263158e-05, |
| "loss": 0.0823, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8820709491850431, |
| "grad_norm": 0.6733153462409973, |
| "learning_rate": 9.673684210526316e-05, |
| "loss": 0.0903, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8916586768935763, |
| "grad_norm": 0.8253368139266968, |
| "learning_rate": 9.778947368421053e-05, |
| "loss": 0.0817, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9012464046021093, |
| "grad_norm": 0.631831169128418, |
| "learning_rate": 9.88421052631579e-05, |
| "loss": 0.0692, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9108341323106424, |
| "grad_norm": 0.4998478293418884, |
| "learning_rate": 9.989473684210526e-05, |
| "loss": 0.08, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9204218600191755, |
| "grad_norm": 0.5345643162727356, |
| "learning_rate": 9.999993865625701e-05, |
| "loss": 0.0707, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9300095877277086, |
| "grad_norm": 0.496713250875473, |
| "learning_rate": 9.999972660400536e-05, |
| "loss": 0.0759, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9395973154362416, |
| "grad_norm": 0.4693014621734619, |
| "learning_rate": 9.999936308655709e-05, |
| "loss": 0.0781, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9491850431447747, |
| "grad_norm": 0.5775050520896912, |
| "learning_rate": 9.999884810501344e-05, |
| "loss": 0.0748, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9587727708533078, |
| "grad_norm": 0.7837674021720886, |
| "learning_rate": 9.999818166093444e-05, |
| "loss": 0.0783, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9683604985618408, |
| "grad_norm": 0.6740615367889404, |
| "learning_rate": 9.999736375633896e-05, |
| "loss": 0.0799, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9779482262703739, |
| "grad_norm": 0.644281268119812, |
| "learning_rate": 9.999639439370469e-05, |
| "loss": 0.0875, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.987535953978907, |
| "grad_norm": 0.6877675652503967, |
| "learning_rate": 9.999527357596816e-05, |
| "loss": 0.0702, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.99712368168744, |
| "grad_norm": 0.8206673860549927, |
| "learning_rate": 9.999400130652465e-05, |
| "loss": 0.0705, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0067114093959733, |
| "grad_norm": 0.5425058007240295, |
| "learning_rate": 9.999257758922833e-05, |
| "loss": 0.0773, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0162991371045063, |
| "grad_norm": 0.7658944725990295, |
| "learning_rate": 9.999100242839203e-05, |
| "loss": 0.0777, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0258868648130393, |
| "grad_norm": 0.73934006690979, |
| "learning_rate": 9.998927582878747e-05, |
| "loss": 0.0685, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.0354745925215725, |
| "grad_norm": 0.38501349091529846, |
| "learning_rate": 9.998739779564506e-05, |
| "loss": 0.069, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.0450623202301055, |
| "grad_norm": 0.45449578762054443, |
| "learning_rate": 9.998536833465394e-05, |
| "loss": 0.0559, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.0546500479386385, |
| "grad_norm": 0.8127736449241638, |
| "learning_rate": 9.998318745196203e-05, |
| "loss": 0.068, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0642377756471717, |
| "grad_norm": 0.6800121068954468, |
| "learning_rate": 9.998085515417588e-05, |
| "loss": 0.0683, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.0738255033557047, |
| "grad_norm": 0.688755214214325, |
| "learning_rate": 9.997837144836082e-05, |
| "loss": 0.0619, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0834132310642377, |
| "grad_norm": 0.6529737710952759, |
| "learning_rate": 9.997573634204074e-05, |
| "loss": 0.0716, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.093000958772771, |
| "grad_norm": 0.773915708065033, |
| "learning_rate": 9.997294984319827e-05, |
| "loss": 0.0667, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.102588686481304, |
| "grad_norm": 0.611422061920166, |
| "learning_rate": 9.997001196027457e-05, |
| "loss": 0.0695, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.112176414189837, |
| "grad_norm": 0.6238502264022827, |
| "learning_rate": 9.996692270216947e-05, |
| "loss": 0.0632, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.1217641418983701, |
| "grad_norm": 0.6252961158752441, |
| "learning_rate": 9.996368207824128e-05, |
| "loss": 0.0708, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.1313518696069031, |
| "grad_norm": 0.3486538529396057, |
| "learning_rate": 9.996029009830689e-05, |
| "loss": 0.0662, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.1409395973154361, |
| "grad_norm": 0.40418991446495056, |
| "learning_rate": 9.995674677264173e-05, |
| "loss": 0.0591, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.1505273250239694, |
| "grad_norm": 0.4740557074546814, |
| "learning_rate": 9.995305211197965e-05, |
| "loss": 0.0701, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1601150527325024, |
| "grad_norm": 0.713366687297821, |
| "learning_rate": 9.994920612751295e-05, |
| "loss": 0.073, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.1697027804410354, |
| "grad_norm": 0.6612546443939209, |
| "learning_rate": 9.994520883089238e-05, |
| "loss": 0.0681, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.1792905081495686, |
| "grad_norm": 0.6933987736701965, |
| "learning_rate": 9.994106023422699e-05, |
| "loss": 0.0655, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.1888782358581016, |
| "grad_norm": 0.4890410602092743, |
| "learning_rate": 9.993676035008423e-05, |
| "loss": 0.0633, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.1984659635666346, |
| "grad_norm": 0.5587823987007141, |
| "learning_rate": 9.993230919148985e-05, |
| "loss": 0.0656, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2080536912751678, |
| "grad_norm": 0.6635778546333313, |
| "learning_rate": 9.99277067719278e-05, |
| "loss": 0.0603, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.2176414189837008, |
| "grad_norm": 0.6514385342597961, |
| "learning_rate": 9.99229531053403e-05, |
| "loss": 0.0652, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.2272291466922338, |
| "grad_norm": 0.5782362818717957, |
| "learning_rate": 9.991804820612773e-05, |
| "loss": 0.0644, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.236816874400767, |
| "grad_norm": 0.39845097064971924, |
| "learning_rate": 9.99129920891486e-05, |
| "loss": 0.0617, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.2464046021093, |
| "grad_norm": 0.5628125667572021, |
| "learning_rate": 9.990778476971951e-05, |
| "loss": 0.0613, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.255992329817833, |
| "grad_norm": 0.4811013340950012, |
| "learning_rate": 9.99024262636151e-05, |
| "loss": 0.0644, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.2655800575263663, |
| "grad_norm": 0.540348470211029, |
| "learning_rate": 9.989691658706798e-05, |
| "loss": 0.063, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.2751677852348993, |
| "grad_norm": 0.593609631061554, |
| "learning_rate": 9.989125575676876e-05, |
| "loss": 0.0537, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.2847555129434325, |
| "grad_norm": 0.4400087296962738, |
| "learning_rate": 9.988544378986591e-05, |
| "loss": 0.0634, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.2943432406519655, |
| "grad_norm": 0.7038517594337463, |
| "learning_rate": 9.987948070396571e-05, |
| "loss": 0.0564, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3039309683604985, |
| "grad_norm": 0.4805976450443268, |
| "learning_rate": 9.987336651713229e-05, |
| "loss": 0.0604, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.3135186960690317, |
| "grad_norm": 0.5478856563568115, |
| "learning_rate": 9.986710124788745e-05, |
| "loss": 0.0573, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.3231064237775647, |
| "grad_norm": 0.6592814922332764, |
| "learning_rate": 9.986068491521072e-05, |
| "loss": 0.0604, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.332694151486098, |
| "grad_norm": 0.7848181128501892, |
| "learning_rate": 9.985411753853921e-05, |
| "loss": 0.055, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.342281879194631, |
| "grad_norm": 0.40262654423713684, |
| "learning_rate": 9.984739913776765e-05, |
| "loss": 0.0629, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.351869606903164, |
| "grad_norm": 0.6241422295570374, |
| "learning_rate": 9.984052973324817e-05, |
| "loss": 0.0609, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.3614573346116972, |
| "grad_norm": 0.7500850558280945, |
| "learning_rate": 9.983350934579046e-05, |
| "loss": 0.0742, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.3710450623202302, |
| "grad_norm": 0.6990365386009216, |
| "learning_rate": 9.982633799666146e-05, |
| "loss": 0.0605, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.3806327900287632, |
| "grad_norm": 0.5741100311279297, |
| "learning_rate": 9.981901570758554e-05, |
| "loss": 0.0639, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.3902205177372964, |
| "grad_norm": 0.6131389141082764, |
| "learning_rate": 9.981154250074422e-05, |
| "loss": 0.0695, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.3998082454458294, |
| "grad_norm": 0.6654881834983826, |
| "learning_rate": 9.980391839877628e-05, |
| "loss": 0.0755, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.4093959731543624, |
| "grad_norm": 0.5249256491661072, |
| "learning_rate": 9.979614342477753e-05, |
| "loss": 0.0613, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.4189837008628956, |
| "grad_norm": 0.5373178124427795, |
| "learning_rate": 9.978821760230086e-05, |
| "loss": 0.072, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.4792821407318115, |
| "learning_rate": 9.978014095535615e-05, |
| "loss": 0.0549, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.4381591562799616, |
| "grad_norm": 0.5644699931144714, |
| "learning_rate": 9.977191350841016e-05, |
| "loss": 0.065, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4477468839884948, |
| "grad_norm": 0.374956339597702, |
| "learning_rate": 9.976353528638642e-05, |
| "loss": 0.0545, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.4573346116970278, |
| "grad_norm": 0.4185064733028412, |
| "learning_rate": 9.975500631466527e-05, |
| "loss": 0.0619, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.4669223394055608, |
| "grad_norm": 0.3903638422489166, |
| "learning_rate": 9.974632661908372e-05, |
| "loss": 0.0526, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.476510067114094, |
| "grad_norm": 0.45104435086250305, |
| "learning_rate": 9.973749622593534e-05, |
| "loss": 0.061, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.486097794822627, |
| "grad_norm": 0.4152944087982178, |
| "learning_rate": 9.972851516197019e-05, |
| "loss": 0.0635, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.49568552253116, |
| "grad_norm": 0.5824716091156006, |
| "learning_rate": 9.971938345439484e-05, |
| "loss": 0.0598, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.5052732502396933, |
| "grad_norm": 0.5598675608634949, |
| "learning_rate": 9.971010113087212e-05, |
| "loss": 0.0529, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.5148609779482263, |
| "grad_norm": 0.6759763956069946, |
| "learning_rate": 9.970066821952118e-05, |
| "loss": 0.0687, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.5244487056567593, |
| "grad_norm": 0.4682703912258148, |
| "learning_rate": 9.969108474891732e-05, |
| "loss": 0.0557, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.5340364333652925, |
| "grad_norm": 0.6091550588607788, |
| "learning_rate": 9.968135074809194e-05, |
| "loss": 0.0628, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5436241610738255, |
| "grad_norm": 0.5167152881622314, |
| "learning_rate": 9.96714662465325e-05, |
| "loss": 0.056, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.5532118887823585, |
| "grad_norm": 0.5612486004829407, |
| "learning_rate": 9.966143127418225e-05, |
| "loss": 0.0565, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.5627996164908917, |
| "grad_norm": 0.3620167672634125, |
| "learning_rate": 9.965124586144039e-05, |
| "loss": 0.0533, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.5723873441994247, |
| "grad_norm": 0.6704486012458801, |
| "learning_rate": 9.964091003916179e-05, |
| "loss": 0.0633, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.5819750719079577, |
| "grad_norm": 0.6531718969345093, |
| "learning_rate": 9.963042383865694e-05, |
| "loss": 0.0665, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.591562799616491, |
| "grad_norm": 0.5249754786491394, |
| "learning_rate": 9.961978729169192e-05, |
| "loss": 0.0471, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.601150527325024, |
| "grad_norm": 0.4377578794956207, |
| "learning_rate": 9.960900043048826e-05, |
| "loss": 0.0561, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.610738255033557, |
| "grad_norm": 0.34821832180023193, |
| "learning_rate": 9.959806328772279e-05, |
| "loss": 0.0575, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.6203259827420902, |
| "grad_norm": 0.41964197158813477, |
| "learning_rate": 9.958697589652763e-05, |
| "loss": 0.0552, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.6299137104506232, |
| "grad_norm": 0.5038737058639526, |
| "learning_rate": 9.957573829049004e-05, |
| "loss": 0.0571, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6395014381591562, |
| "grad_norm": 0.5568312406539917, |
| "learning_rate": 9.956435050365233e-05, |
| "loss": 0.0535, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.6490891658676894, |
| "grad_norm": 0.3089469075202942, |
| "learning_rate": 9.955281257051178e-05, |
| "loss": 0.0567, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.6586768935762224, |
| "grad_norm": 0.5025231838226318, |
| "learning_rate": 9.954112452602045e-05, |
| "loss": 0.0595, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.6682646212847554, |
| "grad_norm": 0.6473100185394287, |
| "learning_rate": 9.952928640558519e-05, |
| "loss": 0.0583, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.6778523489932886, |
| "grad_norm": 0.38910412788391113, |
| "learning_rate": 9.951729824506745e-05, |
| "loss": 0.0606, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.6874400767018218, |
| "grad_norm": 0.5367538332939148, |
| "learning_rate": 9.950516008078325e-05, |
| "loss": 0.0658, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.6970278044103546, |
| "grad_norm": 0.5526398420333862, |
| "learning_rate": 9.949287194950293e-05, |
| "loss": 0.0554, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.7066155321188878, |
| "grad_norm": 0.5616441369056702, |
| "learning_rate": 9.948043388845121e-05, |
| "loss": 0.0579, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.716203259827421, |
| "grad_norm": 0.41163280606269836, |
| "learning_rate": 9.946784593530694e-05, |
| "loss": 0.0612, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.7257909875359538, |
| "grad_norm": 0.45861759781837463, |
| "learning_rate": 9.945510812820308e-05, |
| "loss": 0.0524, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.735378715244487, |
| "grad_norm": 0.4847518503665924, |
| "learning_rate": 9.944222050572653e-05, |
| "loss": 0.0545, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.7449664429530203, |
| "grad_norm": 0.36065423488616943, |
| "learning_rate": 9.942918310691803e-05, |
| "loss": 0.0503, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.754554170661553, |
| "grad_norm": 0.5361629128456116, |
| "learning_rate": 9.941599597127202e-05, |
| "loss": 0.0582, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.7641418983700863, |
| "grad_norm": 0.290815532207489, |
| "learning_rate": 9.940265913873657e-05, |
| "loss": 0.0626, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.7737296260786195, |
| "grad_norm": 0.3743116855621338, |
| "learning_rate": 9.938917264971324e-05, |
| "loss": 0.0577, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.7833173537871523, |
| "grad_norm": 0.7040207982063293, |
| "learning_rate": 9.937553654505691e-05, |
| "loss": 0.0625, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.7929050814956855, |
| "grad_norm": 0.4356692135334015, |
| "learning_rate": 9.936175086607572e-05, |
| "loss": 0.0616, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.8024928092042187, |
| "grad_norm": 0.3443772494792938, |
| "learning_rate": 9.934781565453089e-05, |
| "loss": 0.0573, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.8120805369127517, |
| "grad_norm": 0.4956841766834259, |
| "learning_rate": 9.933373095263667e-05, |
| "loss": 0.0528, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.8216682646212847, |
| "grad_norm": 0.5193634629249573, |
| "learning_rate": 9.931949680306012e-05, |
| "loss": 0.0548, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.831255992329818, |
| "grad_norm": 0.3799174129962921, |
| "learning_rate": 9.930511324892104e-05, |
| "loss": 0.0563, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.840843720038351, |
| "grad_norm": 0.3923283815383911, |
| "learning_rate": 9.929058033379181e-05, |
| "loss": 0.0595, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.850431447746884, |
| "grad_norm": 0.47552716732025146, |
| "learning_rate": 9.927589810169733e-05, |
| "loss": 0.0546, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.8600191754554172, |
| "grad_norm": 0.4305611848831177, |
| "learning_rate": 9.926106659711476e-05, |
| "loss": 0.0523, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.8696069031639502, |
| "grad_norm": 0.5576485395431519, |
| "learning_rate": 9.924608586497348e-05, |
| "loss": 0.0574, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.8791946308724832, |
| "grad_norm": 0.31708958745002747, |
| "learning_rate": 9.923095595065494e-05, |
| "loss": 0.0482, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.8887823585810164, |
| "grad_norm": 0.41617056727409363, |
| "learning_rate": 9.921567689999247e-05, |
| "loss": 0.0584, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.8983700862895494, |
| "grad_norm": 0.5047758221626282, |
| "learning_rate": 9.920024875927125e-05, |
| "loss": 0.0642, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.9079578139980824, |
| "grad_norm": 0.4173164367675781, |
| "learning_rate": 9.918467157522805e-05, |
| "loss": 0.0548, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.9175455417066156, |
| "grad_norm": 0.4640159010887146, |
| "learning_rate": 9.916894539505115e-05, |
| "loss": 0.0499, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9271332694151486, |
| "grad_norm": 0.41713109612464905, |
| "learning_rate": 9.915307026638018e-05, |
| "loss": 0.0491, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.9367209971236816, |
| "grad_norm": 0.392994225025177, |
| "learning_rate": 9.9137046237306e-05, |
| "loss": 0.0522, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.9463087248322148, |
| "grad_norm": 0.32308030128479004, |
| "learning_rate": 9.912087335637054e-05, |
| "loss": 0.0557, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.9558964525407478, |
| "grad_norm": 0.406943678855896, |
| "learning_rate": 9.910455167256663e-05, |
| "loss": 0.0523, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.9654841802492808, |
| "grad_norm": 0.3809382915496826, |
| "learning_rate": 9.908808123533787e-05, |
| "loss": 0.0567, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.975071907957814, |
| "grad_norm": 0.3431997299194336, |
| "learning_rate": 9.907146209457852e-05, |
| "loss": 0.0456, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.984659635666347, |
| "grad_norm": 0.37939101457595825, |
| "learning_rate": 9.905469430063325e-05, |
| "loss": 0.0479, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.99424736337488, |
| "grad_norm": 0.492702841758728, |
| "learning_rate": 9.903777790429714e-05, |
| "loss": 0.048, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.0038350910834133, |
| "grad_norm": 0.41130146384239197, |
| "learning_rate": 9.90207129568153e-05, |
| "loss": 0.0545, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.0134228187919465, |
| "grad_norm": 0.5280721187591553, |
| "learning_rate": 9.900349950988297e-05, |
| "loss": 0.0516, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.0230105465004793, |
| "grad_norm": 0.3090174198150635, |
| "learning_rate": 9.89861376156452e-05, |
| "loss": 0.043, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.0325982742090125, |
| "grad_norm": 0.35579144954681396, |
| "learning_rate": 9.896862732669671e-05, |
| "loss": 0.0584, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.0421860019175457, |
| "grad_norm": 0.44842928647994995, |
| "learning_rate": 9.89509686960818e-05, |
| "loss": 0.0523, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.0517737296260785, |
| "grad_norm": 0.4050745666027069, |
| "learning_rate": 9.893316177729411e-05, |
| "loss": 0.0529, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.0613614573346117, |
| "grad_norm": 0.2710857093334198, |
| "learning_rate": 9.891520662427651e-05, |
| "loss": 0.0582, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.070949185043145, |
| "grad_norm": 0.327932745218277, |
| "learning_rate": 9.88971032914209e-05, |
| "loss": 0.056, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.0805369127516777, |
| "grad_norm": 0.41889169812202454, |
| "learning_rate": 9.887885183356809e-05, |
| "loss": 0.0449, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.090124640460211, |
| "grad_norm": 0.37824153900146484, |
| "learning_rate": 9.886045230600757e-05, |
| "loss": 0.0478, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.099712368168744, |
| "grad_norm": 0.4298747479915619, |
| "learning_rate": 9.884190476447746e-05, |
| "loss": 0.0479, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.109300095877277, |
| "grad_norm": 0.5047415494918823, |
| "learning_rate": 9.882320926516416e-05, |
| "loss": 0.0509, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.11888782358581, |
| "grad_norm": 0.3802444338798523, |
| "learning_rate": 9.880436586470234e-05, |
| "loss": 0.0469, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.1284755512943434, |
| "grad_norm": 0.3608779311180115, |
| "learning_rate": 9.87853746201747e-05, |
| "loss": 0.0499, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.138063279002876, |
| "grad_norm": 0.49108660221099854, |
| "learning_rate": 9.876623558911181e-05, |
| "loss": 0.0494, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.1476510067114094, |
| "grad_norm": 0.35984379053115845, |
| "learning_rate": 9.874694882949194e-05, |
| "loss": 0.0513, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.1572387344199426, |
| "grad_norm": 0.6457746624946594, |
| "learning_rate": 9.872751439974084e-05, |
| "loss": 0.0497, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.1668264621284754, |
| "grad_norm": 0.4572752118110657, |
| "learning_rate": 9.870793235873164e-05, |
| "loss": 0.0497, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.1764141898370086, |
| "grad_norm": 0.5329883098602295, |
| "learning_rate": 9.868820276578463e-05, |
| "loss": 0.0597, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.186001917545542, |
| "grad_norm": 0.4147273302078247, |
| "learning_rate": 9.866832568066706e-05, |
| "loss": 0.0537, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.1955896452540746, |
| "grad_norm": 0.3269449770450592, |
| "learning_rate": 9.864830116359299e-05, |
| "loss": 0.0541, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.205177372962608, |
| "grad_norm": 0.38033929467201233, |
| "learning_rate": 9.862812927522309e-05, |
| "loss": 0.0493, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.214765100671141, |
| "grad_norm": 0.39863190054893494, |
| "learning_rate": 9.86078100766645e-05, |
| "loss": 0.0582, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.224352828379674, |
| "grad_norm": 0.3785865604877472, |
| "learning_rate": 9.858734362947056e-05, |
| "loss": 0.0451, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.233940556088207, |
| "grad_norm": 0.3535449802875519, |
| "learning_rate": 9.856672999564072e-05, |
| "loss": 0.0569, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.2435282837967403, |
| "grad_norm": 0.43401646614074707, |
| "learning_rate": 9.854596923762026e-05, |
| "loss": 0.0451, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.253116011505273, |
| "grad_norm": 0.3438590466976166, |
| "learning_rate": 9.852506141830018e-05, |
| "loss": 0.0527, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.2627037392138063, |
| "grad_norm": 0.524154543876648, |
| "learning_rate": 9.850400660101698e-05, |
| "loss": 0.0536, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.2722914669223395, |
| "grad_norm": 0.6278344392776489, |
| "learning_rate": 9.848280484955243e-05, |
| "loss": 0.0566, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.2818791946308723, |
| "grad_norm": 0.45389410853385925, |
| "learning_rate": 9.846145622813343e-05, |
| "loss": 0.0538, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.2914669223394055, |
| "grad_norm": 0.3653407692909241, |
| "learning_rate": 9.843996080143181e-05, |
| "loss": 0.0496, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.3010546500479387, |
| "grad_norm": 0.39420798420906067, |
| "learning_rate": 9.84183186345641e-05, |
| "loss": 0.0507, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.310642377756472, |
| "grad_norm": 0.36511731147766113, |
| "learning_rate": 9.839652979309135e-05, |
| "loss": 0.0415, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.3202301054650047, |
| "grad_norm": 0.6739844679832458, |
| "learning_rate": 9.837459434301896e-05, |
| "loss": 0.0497, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.329817833173538, |
| "grad_norm": 0.3520050346851349, |
| "learning_rate": 9.835251235079643e-05, |
| "loss": 0.0476, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.3394055608820707, |
| "grad_norm": 0.3880830705165863, |
| "learning_rate": 9.833028388331719e-05, |
| "loss": 0.0477, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.348993288590604, |
| "grad_norm": 0.5605785250663757, |
| "learning_rate": 9.830790900791842e-05, |
| "loss": 0.0565, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.358581016299137, |
| "grad_norm": 0.43835964798927307, |
| "learning_rate": 9.828538779238074e-05, |
| "loss": 0.0481, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.3681687440076704, |
| "grad_norm": 0.46309876441955566, |
| "learning_rate": 9.826272030492817e-05, |
| "loss": 0.0459, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.377756471716203, |
| "grad_norm": 0.315773606300354, |
| "learning_rate": 9.823990661422778e-05, |
| "loss": 0.0446, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.3873441994247364, |
| "grad_norm": 0.37291958928108215, |
| "learning_rate": 9.821694678938953e-05, |
| "loss": 0.0394, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.396931927133269, |
| "grad_norm": 0.5233327150344849, |
| "learning_rate": 9.819384089996613e-05, |
| "loss": 0.0494, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.4065196548418024, |
| "grad_norm": 0.33032602071762085, |
| "learning_rate": 9.817058901595269e-05, |
| "loss": 0.0586, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.4161073825503356, |
| "grad_norm": 0.39209842681884766, |
| "learning_rate": 9.814719120778663e-05, |
| "loss": 0.0528, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.425695110258869, |
| "grad_norm": 0.3824262320995331, |
| "learning_rate": 9.81236475463474e-05, |
| "loss": 0.0502, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.4352828379674016, |
| "grad_norm": 0.4724734127521515, |
| "learning_rate": 9.809995810295633e-05, |
| "loss": 0.0538, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.444870565675935, |
| "grad_norm": 0.4816121459007263, |
| "learning_rate": 9.80761229493763e-05, |
| "loss": 0.0599, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.4544582933844676, |
| "grad_norm": 0.4902478754520416, |
| "learning_rate": 9.805214215781165e-05, |
| "loss": 0.0579, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.464046021093001, |
| "grad_norm": 0.4263833463191986, |
| "learning_rate": 9.802801580090785e-05, |
| "loss": 0.0496, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.473633748801534, |
| "grad_norm": 0.4122842848300934, |
| "learning_rate": 9.800374395175143e-05, |
| "loss": 0.0601, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.4832214765100673, |
| "grad_norm": 0.3193143308162689, |
| "learning_rate": 9.797932668386955e-05, |
| "loss": 0.0453, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.4928092042186, |
| "grad_norm": 0.302079439163208, |
| "learning_rate": 9.795476407122994e-05, |
| "loss": 0.0526, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.5023969319271333, |
| "grad_norm": 0.3169849216938019, |
| "learning_rate": 9.793005618824066e-05, |
| "loss": 0.0475, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.511984659635666, |
| "grad_norm": 0.35016322135925293, |
| "learning_rate": 9.790520310974978e-05, |
| "loss": 0.0523, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.5215723873441993, |
| "grad_norm": 0.5532832741737366, |
| "learning_rate": 9.788020491104524e-05, |
| "loss": 0.0516, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.5311601150527325, |
| "grad_norm": 0.48316141963005066, |
| "learning_rate": 9.785506166785461e-05, |
| "loss": 0.0455, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.5407478427612658, |
| "grad_norm": 0.53989177942276, |
| "learning_rate": 9.78297734563448e-05, |
| "loss": 0.05, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.5503355704697985, |
| "grad_norm": 0.44286760687828064, |
| "learning_rate": 9.780434035312196e-05, |
| "loss": 0.0552, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.5599232981783318, |
| "grad_norm": 0.5638286471366882, |
| "learning_rate": 9.777876243523108e-05, |
| "loss": 0.062, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.569511025886865, |
| "grad_norm": 0.45765963196754456, |
| "learning_rate": 9.775303978015585e-05, |
| "loss": 0.0535, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.5790987535953978, |
| "grad_norm": 0.3893742859363556, |
| "learning_rate": 9.772717246581848e-05, |
| "loss": 0.055, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.588686481303931, |
| "grad_norm": 0.4707334637641907, |
| "learning_rate": 9.770116057057933e-05, |
| "loss": 0.055, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.598274209012464, |
| "grad_norm": 0.4900120198726654, |
| "learning_rate": 9.767500417323676e-05, |
| "loss": 0.056, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.607861936720997, |
| "grad_norm": 0.3331255316734314, |
| "learning_rate": 9.764870335302689e-05, |
| "loss": 0.0502, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.61744966442953, |
| "grad_norm": 0.47928670048713684, |
| "learning_rate": 9.762225818962336e-05, |
| "loss": 0.0514, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.6270373921380634, |
| "grad_norm": 0.3848089873790741, |
| "learning_rate": 9.759566876313701e-05, |
| "loss": 0.044, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.636625119846596, |
| "grad_norm": 0.4957471787929535, |
| "learning_rate": 9.756893515411574e-05, |
| "loss": 0.0434, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.6462128475551294, |
| "grad_norm": 0.5820662975311279, |
| "learning_rate": 9.754205744354423e-05, |
| "loss": 0.0484, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.6558005752636626, |
| "grad_norm": 0.3916762173175812, |
| "learning_rate": 9.751503571284368e-05, |
| "loss": 0.0488, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.665388302972196, |
| "grad_norm": 0.30791330337524414, |
| "learning_rate": 9.748787004387157e-05, |
| "loss": 0.0513, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.6749760306807286, |
| "grad_norm": 0.5171549320220947, |
| "learning_rate": 9.74605605189214e-05, |
| "loss": 0.0516, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.684563758389262, |
| "grad_norm": 0.47496703267097473, |
| "learning_rate": 9.743310722072251e-05, |
| "loss": 0.0493, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.6941514860977946, |
| "grad_norm": 0.5075270533561707, |
| "learning_rate": 9.74055102324397e-05, |
| "loss": 0.0489, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.703739213806328, |
| "grad_norm": 0.4490506052970886, |
| "learning_rate": 9.737776963767313e-05, |
| "loss": 0.0576, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.713326941514861, |
| "grad_norm": 0.3923519551753998, |
| "learning_rate": 9.734988552045792e-05, |
| "loss": 0.0513, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.7229146692233943, |
| "grad_norm": 0.2816771864891052, |
| "learning_rate": 9.7321857965264e-05, |
| "loss": 0.0578, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.732502396931927, |
| "grad_norm": 0.6326708793640137, |
| "learning_rate": 9.729368705699587e-05, |
| "loss": 0.0452, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.7420901246404603, |
| "grad_norm": 0.3657870292663574, |
| "learning_rate": 9.726537288099215e-05, |
| "loss": 0.0524, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.751677852348993, |
| "grad_norm": 0.3347817063331604, |
| "learning_rate": 9.723691552302562e-05, |
| "loss": 0.0451, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.7612655800575263, |
| "grad_norm": 0.4541146457195282, |
| "learning_rate": 9.720831506930274e-05, |
| "loss": 0.0487, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.7708533077660595, |
| "grad_norm": 0.4089963734149933, |
| "learning_rate": 9.71795716064634e-05, |
| "loss": 0.0479, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.7804410354745928, |
| "grad_norm": 0.3474633991718292, |
| "learning_rate": 9.715068522158081e-05, |
| "loss": 0.0467, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.7900287631831255, |
| "grad_norm": 0.49998903274536133, |
| "learning_rate": 9.712165600216107e-05, |
| "loss": 0.0579, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.7996164908916588, |
| "grad_norm": 0.41667240858078003, |
| "learning_rate": 9.709248403614298e-05, |
| "loss": 0.0456, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.8092042186001915, |
| "grad_norm": 0.3876051604747772, |
| "learning_rate": 9.706316941189779e-05, |
| "loss": 0.0411, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.8187919463087248, |
| "grad_norm": 0.34348323941230774, |
| "learning_rate": 9.703371221822888e-05, |
| "loss": 0.0463, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.828379674017258, |
| "grad_norm": 0.5338907241821289, |
| "learning_rate": 9.700411254437154e-05, |
| "loss": 0.0476, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.837967401725791, |
| "grad_norm": 0.5973591804504395, |
| "learning_rate": 9.697437047999266e-05, |
| "loss": 0.0531, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.847555129434324, |
| "grad_norm": 0.31144216656684875, |
| "learning_rate": 9.694448611519049e-05, |
| "loss": 0.0494, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.4310339391231537, |
| "learning_rate": 9.691445954049434e-05, |
| "loss": 0.0448, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.86673058485139, |
| "grad_norm": 0.36877721548080444, |
| "learning_rate": 9.688429084686435e-05, |
| "loss": 0.043, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.876318312559923, |
| "grad_norm": 0.35387906432151794, |
| "learning_rate": 9.685398012569115e-05, |
| "loss": 0.055, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.8859060402684564, |
| "grad_norm": 0.3781449496746063, |
| "learning_rate": 9.682352746879562e-05, |
| "loss": 0.0513, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.8954937679769897, |
| "grad_norm": 0.3556309938430786, |
| "learning_rate": 9.679293296842863e-05, |
| "loss": 0.0556, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.9050814956855224, |
| "grad_norm": 0.4965471923351288, |
| "learning_rate": 9.676219671727072e-05, |
| "loss": 0.0502, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.9146692233940557, |
| "grad_norm": 0.40289080142974854, |
| "learning_rate": 9.673131880843185e-05, |
| "loss": 0.0474, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.9242569511025884, |
| "grad_norm": 0.3517281115055084, |
| "learning_rate": 9.67002993354511e-05, |
| "loss": 0.0557, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.9338446788111217, |
| "grad_norm": 0.5005010366439819, |
| "learning_rate": 9.66691383922964e-05, |
| "loss": 0.059, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.943432406519655, |
| "grad_norm": 0.36781349778175354, |
| "learning_rate": 9.66378360733642e-05, |
| "loss": 0.055, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.953020134228188, |
| "grad_norm": 0.310249388217926, |
| "learning_rate": 9.660639247347931e-05, |
| "loss": 0.0523, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.962607861936721, |
| "grad_norm": 0.27061378955841064, |
| "learning_rate": 9.657480768789446e-05, |
| "loss": 0.0505, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.972195589645254, |
| "grad_norm": 0.34516626596450806, |
| "learning_rate": 9.654308181229006e-05, |
| "loss": 0.0489, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.981783317353787, |
| "grad_norm": 0.3140753209590912, |
| "learning_rate": 9.651121494277396e-05, |
| "loss": 0.0531, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.99137104506232, |
| "grad_norm": 0.4165388345718384, |
| "learning_rate": 9.647920717588114e-05, |
| "loss": 0.0571, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.0009587727708533, |
| "grad_norm": 0.36014652252197266, |
| "learning_rate": 9.644705860857339e-05, |
| "loss": 0.0515, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.0105465004793865, |
| "grad_norm": 0.4353986382484436, |
| "learning_rate": 9.641476933823899e-05, |
| "loss": 0.0488, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.0201342281879193, |
| "grad_norm": 0.4083373546600342, |
| "learning_rate": 9.638233946269253e-05, |
| "loss": 0.052, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.0297219558964525, |
| "grad_norm": 0.3805656135082245, |
| "learning_rate": 9.634976908017446e-05, |
| "loss": 0.0461, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.0393096836049858, |
| "grad_norm": 0.36862942576408386, |
| "learning_rate": 9.631705828935092e-05, |
| "loss": 0.0526, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.0488974113135185, |
| "grad_norm": 0.4625187814235687, |
| "learning_rate": 9.628420718931338e-05, |
| "loss": 0.0536, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.0584851390220518, |
| "grad_norm": 0.2972494959831238, |
| "learning_rate": 9.625121587957834e-05, |
| "loss": 0.0468, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.068072866730585, |
| "grad_norm": 0.5064423084259033, |
| "learning_rate": 9.621808446008708e-05, |
| "loss": 0.0516, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.0776605944391178, |
| "grad_norm": 0.28751927614212036, |
| "learning_rate": 9.618481303120528e-05, |
| "loss": 0.0463, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.087248322147651, |
| "grad_norm": 0.4198159873485565, |
| "learning_rate": 9.615140169372274e-05, |
| "loss": 0.0395, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.096836049856184, |
| "grad_norm": 0.41463902592658997, |
| "learning_rate": 9.611785054885312e-05, |
| "loss": 0.0501, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.106423777564717, |
| "grad_norm": 0.37878739833831787, |
| "learning_rate": 9.608415969823361e-05, |
| "loss": 0.0484, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.11601150527325, |
| "grad_norm": 0.4990726113319397, |
| "learning_rate": 9.605032924392457e-05, |
| "loss": 0.049, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.1255992329817834, |
| "grad_norm": 0.39530688524246216, |
| "learning_rate": 9.601635928840927e-05, |
| "loss": 0.0658, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.135186960690316, |
| "grad_norm": 0.5206883549690247, |
| "learning_rate": 9.598224993459364e-05, |
| "loss": 0.0538, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.1447746883988494, |
| "grad_norm": 0.5972046256065369, |
| "learning_rate": 9.594800128580582e-05, |
| "loss": 0.054, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.1543624161073827, |
| "grad_norm": 0.33001407980918884, |
| "learning_rate": 9.591361344579595e-05, |
| "loss": 0.0544, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.1639501438159154, |
| "grad_norm": 0.38547295331954956, |
| "learning_rate": 9.58790865187358e-05, |
| "loss": 0.0422, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.1735378715244487, |
| "grad_norm": 0.3369503915309906, |
| "learning_rate": 9.584442060921851e-05, |
| "loss": 0.0472, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.183125599232982, |
| "grad_norm": 0.2815903127193451, |
| "learning_rate": 9.580961582225826e-05, |
| "loss": 0.0463, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.1927133269415147, |
| "grad_norm": 0.42745402455329895, |
| "learning_rate": 9.577467226328987e-05, |
| "loss": 0.0517, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.202301054650048, |
| "grad_norm": 0.46006882190704346, |
| "learning_rate": 9.573959003816856e-05, |
| "loss": 0.0494, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.211888782358581, |
| "grad_norm": 0.47103896737098694, |
| "learning_rate": 9.57043692531697e-05, |
| "loss": 0.0511, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.221476510067114, |
| "grad_norm": 0.41211676597595215, |
| "learning_rate": 9.566901001498826e-05, |
| "loss": 0.0512, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.231064237775647, |
| "grad_norm": 0.5582764148712158, |
| "learning_rate": 9.563351243073878e-05, |
| "loss": 0.0584, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.2406519654841803, |
| "grad_norm": 0.3129172921180725, |
| "learning_rate": 9.559787660795474e-05, |
| "loss": 0.0596, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.2502396931927136, |
| "grad_norm": 0.4259207844734192, |
| "learning_rate": 9.556210265458854e-05, |
| "loss": 0.0507, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.2598274209012463, |
| "grad_norm": 0.29509371519088745, |
| "learning_rate": 9.552619067901089e-05, |
| "loss": 0.0519, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.2694151486097796, |
| "grad_norm": 0.33097851276397705, |
| "learning_rate": 9.549014079001074e-05, |
| "loss": 0.0503, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.2790028763183123, |
| "grad_norm": 0.6283732056617737, |
| "learning_rate": 9.545395309679469e-05, |
| "loss": 0.052, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.2885906040268456, |
| "grad_norm": 0.29192429780960083, |
| "learning_rate": 9.54176277089869e-05, |
| "loss": 0.0452, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.2981783317353788, |
| "grad_norm": 0.3860151767730713, |
| "learning_rate": 9.538116473662861e-05, |
| "loss": 0.0536, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.307766059443912, |
| "grad_norm": 0.5127553343772888, |
| "learning_rate": 9.534456429017784e-05, |
| "loss": 0.0521, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.3173537871524448, |
| "grad_norm": 0.4540964961051941, |
| "learning_rate": 9.530782648050907e-05, |
| "loss": 0.0552, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.326941514860978, |
| "grad_norm": 0.34647271037101746, |
| "learning_rate": 9.52709514189129e-05, |
| "loss": 0.0457, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.336529242569511, |
| "grad_norm": 0.4515313506126404, |
| "learning_rate": 9.523393921709574e-05, |
| "loss": 0.0467, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.346116970278044, |
| "grad_norm": 0.3084343373775482, |
| "learning_rate": 9.519678998717935e-05, |
| "loss": 0.0462, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.3557046979865772, |
| "grad_norm": 0.5871327519416809, |
| "learning_rate": 9.515950384170073e-05, |
| "loss": 0.0566, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.3652924256951104, |
| "grad_norm": 0.4407544732093811, |
| "learning_rate": 9.51220808936115e-05, |
| "loss": 0.0436, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.3748801534036432, |
| "grad_norm": 0.3434475362300873, |
| "learning_rate": 9.508452125627779e-05, |
| "loss": 0.0483, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.3844678811121764, |
| "grad_norm": 0.5896394848823547, |
| "learning_rate": 9.504682504347978e-05, |
| "loss": 0.0435, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.3940556088207097, |
| "grad_norm": 0.380214661359787, |
| "learning_rate": 9.500899236941139e-05, |
| "loss": 0.053, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.4036433365292424, |
| "grad_norm": 0.2878900170326233, |
| "learning_rate": 9.497102334867989e-05, |
| "loss": 0.0488, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.4132310642377757, |
| "grad_norm": 0.6185137629508972, |
| "learning_rate": 9.493291809630562e-05, |
| "loss": 0.0512, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.422818791946309, |
| "grad_norm": 0.5001134872436523, |
| "learning_rate": 9.489467672772162e-05, |
| "loss": 0.055, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.4324065196548417, |
| "grad_norm": 0.46808385848999023, |
| "learning_rate": 9.485629935877323e-05, |
| "loss": 0.0524, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.441994247363375, |
| "grad_norm": 0.4512917399406433, |
| "learning_rate": 9.481778610571782e-05, |
| "loss": 0.0487, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.451581975071908, |
| "grad_norm": 0.39726588129997253, |
| "learning_rate": 9.477913708522435e-05, |
| "loss": 0.0578, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.461169702780441, |
| "grad_norm": 0.32351112365722656, |
| "learning_rate": 9.474035241437312e-05, |
| "loss": 0.0488, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.470757430488974, |
| "grad_norm": 0.47034138441085815, |
| "learning_rate": 9.470143221065531e-05, |
| "loss": 0.0618, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.4803451581975073, |
| "grad_norm": 0.23497724533081055, |
| "learning_rate": 9.46623765919727e-05, |
| "loss": 0.0499, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.48993288590604, |
| "grad_norm": 0.25630268454551697, |
| "learning_rate": 9.462318567663728e-05, |
| "loss": 0.0508, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.4995206136145733, |
| "grad_norm": 0.3957800269126892, |
| "learning_rate": 9.458385958337087e-05, |
| "loss": 0.0554, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.5091083413231066, |
| "grad_norm": 0.25262129306793213, |
| "learning_rate": 9.454439843130483e-05, |
| "loss": 0.0473, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.5186960690316393, |
| "grad_norm": 0.3933389186859131, |
| "learning_rate": 9.450480233997963e-05, |
| "loss": 0.0471, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.5282837967401726, |
| "grad_norm": 0.26438847184181213, |
| "learning_rate": 9.446507142934452e-05, |
| "loss": 0.0557, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.537871524448706, |
| "grad_norm": 0.2720869183540344, |
| "learning_rate": 9.442520581975718e-05, |
| "loss": 0.0492, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.547459252157239, |
| "grad_norm": 0.3165934383869171, |
| "learning_rate": 9.438520563198328e-05, |
| "loss": 0.0512, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.557046979865772, |
| "grad_norm": 0.6523368954658508, |
| "learning_rate": 9.434507098719624e-05, |
| "loss": 0.0574, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.566634707574305, |
| "grad_norm": 0.41401076316833496, |
| "learning_rate": 9.430480200697676e-05, |
| "loss": 0.0509, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.576222435282838, |
| "grad_norm": 0.29742154479026794, |
| "learning_rate": 9.426439881331248e-05, |
| "loss": 0.0489, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.585810162991371, |
| "grad_norm": 0.40217605233192444, |
| "learning_rate": 9.422386152859763e-05, |
| "loss": 0.0466, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.5953978906999042, |
| "grad_norm": 0.3434045612812042, |
| "learning_rate": 9.418319027563263e-05, |
| "loss": 0.0575, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.6049856184084375, |
| "grad_norm": 0.6345980763435364, |
| "learning_rate": 9.414238517762373e-05, |
| "loss": 0.0453, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.6145733461169702, |
| "grad_norm": 0.43346667289733887, |
| "learning_rate": 9.410144635818266e-05, |
| "loss": 0.055, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.6241610738255035, |
| "grad_norm": 0.36115562915802, |
| "learning_rate": 9.406037394132623e-05, |
| "loss": 0.0535, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.6337488015340362, |
| "grad_norm": 0.2766103744506836, |
| "learning_rate": 9.401916805147596e-05, |
| "loss": 0.0463, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.6433365292425695, |
| "grad_norm": 0.39829254150390625, |
| "learning_rate": 9.397782881345767e-05, |
| "loss": 0.0463, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.6529242569511027, |
| "grad_norm": 0.3240996301174164, |
| "learning_rate": 9.39363563525012e-05, |
| "loss": 0.0516, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.662511984659636, |
| "grad_norm": 0.416238009929657, |
| "learning_rate": 9.389475079423988e-05, |
| "loss": 0.0483, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.6720997123681687, |
| "grad_norm": 0.24697421491146088, |
| "learning_rate": 9.385301226471032e-05, |
| "loss": 0.0451, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.681687440076702, |
| "grad_norm": 0.3078657388687134, |
| "learning_rate": 9.381114089035188e-05, |
| "loss": 0.0454, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.6912751677852347, |
| "grad_norm": 0.26055672764778137, |
| "learning_rate": 9.376913679800638e-05, |
| "loss": 0.0426, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.700862895493768, |
| "grad_norm": 0.36363962292671204, |
| "learning_rate": 9.372700011491768e-05, |
| "loss": 0.0535, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.710450623202301, |
| "grad_norm": 0.23066310584545135, |
| "learning_rate": 9.36847309687313e-05, |
| "loss": 0.0391, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.7200383509108343, |
| "grad_norm": 0.35935813188552856, |
| "learning_rate": 9.364232948749402e-05, |
| "loss": 0.0404, |
| "step": 3880 |
| }, |
| { |
| "epoch": 3.729626078619367, |
| "grad_norm": 0.42284151911735535, |
| "learning_rate": 9.359979579965352e-05, |
| "loss": 0.0456, |
| "step": 3890 |
| }, |
| { |
| "epoch": 3.7392138063279003, |
| "grad_norm": 0.29598748683929443, |
| "learning_rate": 9.355713003405797e-05, |
| "loss": 0.0486, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.748801534036433, |
| "grad_norm": 0.30880895256996155, |
| "learning_rate": 9.351433231995568e-05, |
| "loss": 0.0524, |
| "step": 3910 |
| }, |
| { |
| "epoch": 3.7583892617449663, |
| "grad_norm": 0.2683268189430237, |
| "learning_rate": 9.34714027869946e-05, |
| "loss": 0.0458, |
| "step": 3920 |
| }, |
| { |
| "epoch": 3.7679769894534996, |
| "grad_norm": 0.3789876401424408, |
| "learning_rate": 9.342834156522204e-05, |
| "loss": 0.0529, |
| "step": 3930 |
| }, |
| { |
| "epoch": 3.777564717162033, |
| "grad_norm": 0.2747150957584381, |
| "learning_rate": 9.338514878508428e-05, |
| "loss": 0.0474, |
| "step": 3940 |
| }, |
| { |
| "epoch": 3.7871524448705656, |
| "grad_norm": 0.3292723000049591, |
| "learning_rate": 9.334182457742607e-05, |
| "loss": 0.0544, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.796740172579099, |
| "grad_norm": 0.28527846932411194, |
| "learning_rate": 9.329836907349033e-05, |
| "loss": 0.0419, |
| "step": 3960 |
| }, |
| { |
| "epoch": 3.8063279002876316, |
| "grad_norm": 0.37766164541244507, |
| "learning_rate": 9.325478240491771e-05, |
| "loss": 0.0503, |
| "step": 3970 |
| }, |
| { |
| "epoch": 3.815915627996165, |
| "grad_norm": 0.4285350739955902, |
| "learning_rate": 9.321106470374618e-05, |
| "loss": 0.0493, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.825503355704698, |
| "grad_norm": 0.432804137468338, |
| "learning_rate": 9.316721610241068e-05, |
| "loss": 0.0452, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.8350910834132312, |
| "grad_norm": 0.32709524035453796, |
| "learning_rate": 9.312323673374269e-05, |
| "loss": 0.049, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.844678811121764, |
| "grad_norm": 0.2850819230079651, |
| "learning_rate": 9.30791267309698e-05, |
| "loss": 0.0379, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.8542665388302972, |
| "grad_norm": 0.3472555875778198, |
| "learning_rate": 9.303488622771535e-05, |
| "loss": 0.0412, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.8638542665388305, |
| "grad_norm": 0.545179545879364, |
| "learning_rate": 9.299051535799799e-05, |
| "loss": 0.0535, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.8734419942473632, |
| "grad_norm": 0.43045416474342346, |
| "learning_rate": 9.29460142562313e-05, |
| "loss": 0.0564, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.8830297219558965, |
| "grad_norm": 0.30958643555641174, |
| "learning_rate": 9.290138305722343e-05, |
| "loss": 0.0423, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.8926174496644297, |
| "grad_norm": 0.3504599630832672, |
| "learning_rate": 9.285662189617652e-05, |
| "loss": 0.0525, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.9022051773729625, |
| "grad_norm": 0.5074465870857239, |
| "learning_rate": 9.281173090868651e-05, |
| "loss": 0.0505, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.9117929050814957, |
| "grad_norm": 0.30970317125320435, |
| "learning_rate": 9.27667102307426e-05, |
| "loss": 0.0404, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.921380632790029, |
| "grad_norm": 0.35298407077789307, |
| "learning_rate": 9.27215599987268e-05, |
| "loss": 0.0461, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.9309683604985617, |
| "grad_norm": 0.32086381316185, |
| "learning_rate": 9.267628034941369e-05, |
| "loss": 0.0476, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.940556088207095, |
| "grad_norm": 0.33907032012939453, |
| "learning_rate": 9.26308714199698e-05, |
| "loss": 0.0446, |
| "step": 4110 |
| }, |
| { |
| "epoch": 3.950143815915628, |
| "grad_norm": 0.23291510343551636, |
| "learning_rate": 9.258533334795336e-05, |
| "loss": 0.0542, |
| "step": 4120 |
| }, |
| { |
| "epoch": 3.959731543624161, |
| "grad_norm": 0.3786979913711548, |
| "learning_rate": 9.253966627131379e-05, |
| "loss": 0.049, |
| "step": 4130 |
| }, |
| { |
| "epoch": 3.969319271332694, |
| "grad_norm": 0.4073876142501831, |
| "learning_rate": 9.249387032839125e-05, |
| "loss": 0.046, |
| "step": 4140 |
| }, |
| { |
| "epoch": 3.9789069990412274, |
| "grad_norm": 0.3822251856327057, |
| "learning_rate": 9.244794565791639e-05, |
| "loss": 0.0472, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.98849472674976, |
| "grad_norm": 0.43598631024360657, |
| "learning_rate": 9.240189239900972e-05, |
| "loss": 0.0388, |
| "step": 4160 |
| }, |
| { |
| "epoch": 3.9980824544582934, |
| "grad_norm": 0.2129432111978531, |
| "learning_rate": 9.235571069118131e-05, |
| "loss": 0.0492, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.007670182166827, |
| "grad_norm": 0.3745039999485016, |
| "learning_rate": 9.23094006743304e-05, |
| "loss": 0.0447, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.01725790987536, |
| "grad_norm": 0.3619850277900696, |
| "learning_rate": 9.226296248874482e-05, |
| "loss": 0.0523, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.026845637583893, |
| "grad_norm": 0.3835139274597168, |
| "learning_rate": 9.221639627510076e-05, |
| "loss": 0.048, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.036433365292425, |
| "grad_norm": 0.28674259781837463, |
| "learning_rate": 9.216970217446219e-05, |
| "loss": 0.0387, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.046021093000959, |
| "grad_norm": 0.25763458013534546, |
| "learning_rate": 9.21228803282805e-05, |
| "loss": 0.0506, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.055608820709492, |
| "grad_norm": 0.36224737763404846, |
| "learning_rate": 9.207593087839406e-05, |
| "loss": 0.0453, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.065196548418025, |
| "grad_norm": 0.38200250267982483, |
| "learning_rate": 9.202885396702782e-05, |
| "loss": 0.0431, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.074784276126558, |
| "grad_norm": 0.336946964263916, |
| "learning_rate": 9.198164973679285e-05, |
| "loss": 0.0443, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.0843720038350915, |
| "grad_norm": 0.3541509807109833, |
| "learning_rate": 9.193431833068586e-05, |
| "loss": 0.0499, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.093959731543624, |
| "grad_norm": 0.3337682783603668, |
| "learning_rate": 9.188685989208886e-05, |
| "loss": 0.0474, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.103547459252157, |
| "grad_norm": 0.4774644076824188, |
| "learning_rate": 9.183927456476864e-05, |
| "loss": 0.0413, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.11313518696069, |
| "grad_norm": 0.3974810540676117, |
| "learning_rate": 9.179156249287646e-05, |
| "loss": 0.0495, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.1227229146692235, |
| "grad_norm": 0.35930874943733215, |
| "learning_rate": 9.174372382094745e-05, |
| "loss": 0.0481, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.132310642377757, |
| "grad_norm": 0.39746561646461487, |
| "learning_rate": 9.169575869390028e-05, |
| "loss": 0.0401, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.14189837008629, |
| "grad_norm": 0.3344055414199829, |
| "learning_rate": 9.164766725703669e-05, |
| "loss": 0.0471, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.151486097794822, |
| "grad_norm": 0.23866185545921326, |
| "learning_rate": 9.159944965604105e-05, |
| "loss": 0.0424, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.1610738255033555, |
| "grad_norm": 0.3230268657207489, |
| "learning_rate": 9.155110603697996e-05, |
| "loss": 0.0475, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.170661553211889, |
| "grad_norm": 0.3797110915184021, |
| "learning_rate": 9.150263654630172e-05, |
| "loss": 0.0458, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.180249280920422, |
| "grad_norm": 0.41824665665626526, |
| "learning_rate": 9.145404133083591e-05, |
| "loss": 0.0401, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.189837008628955, |
| "grad_norm": 0.45811742544174194, |
| "learning_rate": 9.140532053779307e-05, |
| "loss": 0.0533, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.199424736337488, |
| "grad_norm": 0.3115192651748657, |
| "learning_rate": 9.135647431476407e-05, |
| "loss": 0.0475, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.209012464046021, |
| "grad_norm": 0.27874428033828735, |
| "learning_rate": 9.130750280971978e-05, |
| "loss": 0.0444, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.218600191754554, |
| "grad_norm": 0.5270777940750122, |
| "learning_rate": 9.125840617101058e-05, |
| "loss": 0.0514, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.228187919463087, |
| "grad_norm": 0.40683162212371826, |
| "learning_rate": 9.120918454736593e-05, |
| "loss": 0.0472, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.23777564717162, |
| "grad_norm": 0.30064043402671814, |
| "learning_rate": 9.11598380878939e-05, |
| "loss": 0.0492, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.247363374880154, |
| "grad_norm": 0.4496791362762451, |
| "learning_rate": 9.111036694208072e-05, |
| "loss": 0.0471, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.256951102588687, |
| "grad_norm": 0.39262011647224426, |
| "learning_rate": 9.106077125979037e-05, |
| "loss": 0.0487, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.26653883029722, |
| "grad_norm": 0.34774985909461975, |
| "learning_rate": 9.101105119126405e-05, |
| "loss": 0.0452, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.276126558005752, |
| "grad_norm": 0.4597591459751129, |
| "learning_rate": 9.096120688711978e-05, |
| "loss": 0.0521, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.594453752040863, |
| "learning_rate": 9.091123849835195e-05, |
| "loss": 0.0555, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.295302013422819, |
| "grad_norm": 0.45329248905181885, |
| "learning_rate": 9.086114617633079e-05, |
| "loss": 0.0408, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.304889741131352, |
| "grad_norm": 0.34534817934036255, |
| "learning_rate": 9.081093007280205e-05, |
| "loss": 0.0554, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.314477468839885, |
| "grad_norm": 0.36244168877601624, |
| "learning_rate": 9.076059033988636e-05, |
| "loss": 0.0487, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.324065196548418, |
| "grad_norm": 0.32668572664260864, |
| "learning_rate": 9.071012713007892e-05, |
| "loss": 0.0483, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.333652924256951, |
| "grad_norm": 0.31663575768470764, |
| "learning_rate": 9.065954059624895e-05, |
| "loss": 0.0484, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.343240651965484, |
| "grad_norm": 0.2809025049209595, |
| "learning_rate": 9.06088308916393e-05, |
| "loss": 0.042, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.352828379674017, |
| "grad_norm": 0.2432290017604828, |
| "learning_rate": 9.05579981698659e-05, |
| "loss": 0.0463, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.3624161073825505, |
| "grad_norm": 0.2573339343070984, |
| "learning_rate": 9.050704258491736e-05, |
| "loss": 0.0462, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.372003835091084, |
| "grad_norm": 0.42221635580062866, |
| "learning_rate": 9.045596429115447e-05, |
| "loss": 0.0472, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.381591562799617, |
| "grad_norm": 0.35964876413345337, |
| "learning_rate": 9.040476344330977e-05, |
| "loss": 0.0448, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.391179290508149, |
| "grad_norm": 0.27407506108283997, |
| "learning_rate": 9.035344019648702e-05, |
| "loss": 0.0431, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.4007670182166825, |
| "grad_norm": 0.31676268577575684, |
| "learning_rate": 9.03019947061608e-05, |
| "loss": 0.0441, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.410354745925216, |
| "grad_norm": 0.2982436716556549, |
| "learning_rate": 9.025042712817598e-05, |
| "loss": 0.043, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.419942473633749, |
| "grad_norm": 0.3181396424770355, |
| "learning_rate": 9.019873761874727e-05, |
| "loss": 0.0484, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.429530201342282, |
| "grad_norm": 0.3732481002807617, |
| "learning_rate": 9.014692633445878e-05, |
| "loss": 0.055, |
| "step": 4620 |
| }, |
| { |
| "epoch": 4.439117929050815, |
| "grad_norm": 0.42074957489967346, |
| "learning_rate": 9.009499343226348e-05, |
| "loss": 0.047, |
| "step": 4630 |
| }, |
| { |
| "epoch": 4.448705656759348, |
| "grad_norm": 0.35802584886550903, |
| "learning_rate": 9.004293906948278e-05, |
| "loss": 0.0489, |
| "step": 4640 |
| }, |
| { |
| "epoch": 4.458293384467881, |
| "grad_norm": 0.33133867383003235, |
| "learning_rate": 8.999076340380603e-05, |
| "loss": 0.049, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.467881112176414, |
| "grad_norm": 0.28263920545578003, |
| "learning_rate": 8.993846659329005e-05, |
| "loss": 0.056, |
| "step": 4660 |
| }, |
| { |
| "epoch": 4.477468839884947, |
| "grad_norm": 0.5171105861663818, |
| "learning_rate": 8.988604879635862e-05, |
| "loss": 0.047, |
| "step": 4670 |
| }, |
| { |
| "epoch": 4.487056567593481, |
| "grad_norm": 0.264189749956131, |
| "learning_rate": 8.983351017180208e-05, |
| "loss": 0.0432, |
| "step": 4680 |
| }, |
| { |
| "epoch": 4.496644295302014, |
| "grad_norm": 0.2710209786891937, |
| "learning_rate": 8.978085087877672e-05, |
| "loss": 0.048, |
| "step": 4690 |
| }, |
| { |
| "epoch": 4.506232023010546, |
| "grad_norm": 0.20794712007045746, |
| "learning_rate": 8.972807107680445e-05, |
| "loss": 0.0524, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.515819750719079, |
| "grad_norm": 0.2759157419204712, |
| "learning_rate": 8.96751709257722e-05, |
| "loss": 0.0463, |
| "step": 4710 |
| }, |
| { |
| "epoch": 4.525407478427613, |
| "grad_norm": 0.45379728078842163, |
| "learning_rate": 8.962215058593146e-05, |
| "loss": 0.0483, |
| "step": 4720 |
| }, |
| { |
| "epoch": 4.534995206136146, |
| "grad_norm": 0.35511714220046997, |
| "learning_rate": 8.956901021789785e-05, |
| "loss": 0.0473, |
| "step": 4730 |
| }, |
| { |
| "epoch": 4.544582933844679, |
| "grad_norm": 0.49189603328704834, |
| "learning_rate": 8.951574998265058e-05, |
| "loss": 0.0448, |
| "step": 4740 |
| }, |
| { |
| "epoch": 4.554170661553212, |
| "grad_norm": 0.7247273921966553, |
| "learning_rate": 8.946237004153197e-05, |
| "loss": 0.0514, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.563758389261745, |
| "grad_norm": 0.5640259385108948, |
| "learning_rate": 8.940887055624696e-05, |
| "loss": 0.0495, |
| "step": 4760 |
| }, |
| { |
| "epoch": 4.573346116970278, |
| "grad_norm": 0.9589868187904358, |
| "learning_rate": 8.935525168886262e-05, |
| "loss": 0.0497, |
| "step": 4770 |
| }, |
| { |
| "epoch": 4.582933844678811, |
| "grad_norm": 0.24826788902282715, |
| "learning_rate": 8.930151360180773e-05, |
| "loss": 0.0526, |
| "step": 4780 |
| }, |
| { |
| "epoch": 4.592521572387344, |
| "grad_norm": 0.4066452980041504, |
| "learning_rate": 8.924765645787216e-05, |
| "loss": 0.0482, |
| "step": 4790 |
| }, |
| { |
| "epoch": 4.6021093000958775, |
| "grad_norm": 0.41626861691474915, |
| "learning_rate": 8.919368042020645e-05, |
| "loss": 0.0469, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.611697027804411, |
| "grad_norm": 0.35766589641571045, |
| "learning_rate": 8.913958565232132e-05, |
| "loss": 0.0489, |
| "step": 4810 |
| }, |
| { |
| "epoch": 4.621284755512944, |
| "grad_norm": 0.24869422614574432, |
| "learning_rate": 8.908537231808716e-05, |
| "loss": 0.043, |
| "step": 4820 |
| }, |
| { |
| "epoch": 4.630872483221476, |
| "grad_norm": 0.3498132526874542, |
| "learning_rate": 8.903104058173354e-05, |
| "loss": 0.044, |
| "step": 4830 |
| }, |
| { |
| "epoch": 4.6404602109300095, |
| "grad_norm": 0.5257985591888428, |
| "learning_rate": 8.897659060784869e-05, |
| "loss": 0.0487, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.650047938638543, |
| "grad_norm": 0.3492990732192993, |
| "learning_rate": 8.892202256137905e-05, |
| "loss": 0.0516, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.659635666347076, |
| "grad_norm": 0.5162085294723511, |
| "learning_rate": 8.886733660762871e-05, |
| "loss": 0.0526, |
| "step": 4860 |
| }, |
| { |
| "epoch": 4.669223394055609, |
| "grad_norm": 0.3405402600765228, |
| "learning_rate": 8.881253291225895e-05, |
| "loss": 0.0449, |
| "step": 4870 |
| }, |
| { |
| "epoch": 4.6788111217641415, |
| "grad_norm": 0.4526231586933136, |
| "learning_rate": 8.875761164128772e-05, |
| "loss": 0.053, |
| "step": 4880 |
| }, |
| { |
| "epoch": 4.688398849472675, |
| "grad_norm": 0.3826616108417511, |
| "learning_rate": 8.870257296108918e-05, |
| "loss": 0.0467, |
| "step": 4890 |
| }, |
| { |
| "epoch": 4.697986577181208, |
| "grad_norm": 0.3477012813091278, |
| "learning_rate": 8.86474170383931e-05, |
| "loss": 0.0486, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.707574304889741, |
| "grad_norm": 0.2914051115512848, |
| "learning_rate": 8.859214404028447e-05, |
| "loss": 0.042, |
| "step": 4910 |
| }, |
| { |
| "epoch": 4.717162032598274, |
| "grad_norm": 0.40637078881263733, |
| "learning_rate": 8.85367541342029e-05, |
| "loss": 0.0432, |
| "step": 4920 |
| }, |
| { |
| "epoch": 4.726749760306808, |
| "grad_norm": 0.36229225993156433, |
| "learning_rate": 8.848124748794218e-05, |
| "loss": 0.0498, |
| "step": 4930 |
| }, |
| { |
| "epoch": 4.736337488015341, |
| "grad_norm": 0.33015790581703186, |
| "learning_rate": 8.842562426964974e-05, |
| "loss": 0.0441, |
| "step": 4940 |
| }, |
| { |
| "epoch": 4.745925215723873, |
| "grad_norm": 0.35154151916503906, |
| "learning_rate": 8.83698846478261e-05, |
| "loss": 0.0463, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.755512943432406, |
| "grad_norm": 0.2888050377368927, |
| "learning_rate": 8.831402879132446e-05, |
| "loss": 0.0455, |
| "step": 4960 |
| }, |
| { |
| "epoch": 4.76510067114094, |
| "grad_norm": 0.3235926628112793, |
| "learning_rate": 8.825805686935011e-05, |
| "loss": 0.0551, |
| "step": 4970 |
| }, |
| { |
| "epoch": 4.774688398849473, |
| "grad_norm": 0.44466277956962585, |
| "learning_rate": 8.820196905145997e-05, |
| "loss": 0.0476, |
| "step": 4980 |
| }, |
| { |
| "epoch": 4.784276126558006, |
| "grad_norm": 0.39051833748817444, |
| "learning_rate": 8.814576550756197e-05, |
| "loss": 0.04, |
| "step": 4990 |
| }, |
| { |
| "epoch": 4.793863854266538, |
| "grad_norm": 0.3532402813434601, |
| "learning_rate": 8.808944640791467e-05, |
| "loss": 0.0489, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.803451581975072, |
| "grad_norm": 0.34791117906570435, |
| "learning_rate": 8.803301192312667e-05, |
| "loss": 0.0466, |
| "step": 5010 |
| }, |
| { |
| "epoch": 4.813039309683605, |
| "grad_norm": 0.31138908863067627, |
| "learning_rate": 8.797646222415614e-05, |
| "loss": 0.0407, |
| "step": 5020 |
| }, |
| { |
| "epoch": 4.822627037392138, |
| "grad_norm": 0.2896534502506256, |
| "learning_rate": 8.79197974823102e-05, |
| "loss": 0.0479, |
| "step": 5030 |
| }, |
| { |
| "epoch": 4.832214765100671, |
| "grad_norm": 0.26334378123283386, |
| "learning_rate": 8.786301786924456e-05, |
| "loss": 0.0469, |
| "step": 5040 |
| }, |
| { |
| "epoch": 4.8418024928092045, |
| "grad_norm": 0.2446843832731247, |
| "learning_rate": 8.780612355696283e-05, |
| "loss": 0.0461, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.851390220517738, |
| "grad_norm": 0.2954402267932892, |
| "learning_rate": 8.774911471781613e-05, |
| "loss": 0.0472, |
| "step": 5060 |
| }, |
| { |
| "epoch": 4.86097794822627, |
| "grad_norm": 0.22677741944789886, |
| "learning_rate": 8.769199152450249e-05, |
| "loss": 0.04, |
| "step": 5070 |
| }, |
| { |
| "epoch": 4.870565675934803, |
| "grad_norm": 0.32872337102890015, |
| "learning_rate": 8.76347541500664e-05, |
| "loss": 0.0479, |
| "step": 5080 |
| }, |
| { |
| "epoch": 4.8801534036433365, |
| "grad_norm": 0.4457066059112549, |
| "learning_rate": 8.757740276789818e-05, |
| "loss": 0.0439, |
| "step": 5090 |
| }, |
| { |
| "epoch": 4.88974113135187, |
| "grad_norm": 0.24604512751102448, |
| "learning_rate": 8.751993755173358e-05, |
| "loss": 0.0468, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.899328859060403, |
| "grad_norm": 0.3143763840198517, |
| "learning_rate": 8.746235867565313e-05, |
| "loss": 0.0458, |
| "step": 5110 |
| }, |
| { |
| "epoch": 4.908916586768935, |
| "grad_norm": 0.3161276876926422, |
| "learning_rate": 8.74046663140817e-05, |
| "loss": 0.0502, |
| "step": 5120 |
| }, |
| { |
| "epoch": 4.9185043144774685, |
| "grad_norm": 0.2833130657672882, |
| "learning_rate": 8.734686064178797e-05, |
| "loss": 0.0419, |
| "step": 5130 |
| }, |
| { |
| "epoch": 4.928092042186002, |
| "grad_norm": 0.4420258104801178, |
| "learning_rate": 8.728894183388381e-05, |
| "loss": 0.0465, |
| "step": 5140 |
| }, |
| { |
| "epoch": 4.937679769894535, |
| "grad_norm": 0.353081077337265, |
| "learning_rate": 8.723091006582389e-05, |
| "loss": 0.0451, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.947267497603068, |
| "grad_norm": 0.4228033125400543, |
| "learning_rate": 8.717276551340501e-05, |
| "loss": 0.0495, |
| "step": 5160 |
| }, |
| { |
| "epoch": 4.956855225311601, |
| "grad_norm": 0.3678063452243805, |
| "learning_rate": 8.711450835276565e-05, |
| "loss": 0.0395, |
| "step": 5170 |
| }, |
| { |
| "epoch": 4.966442953020135, |
| "grad_norm": 0.4963276982307434, |
| "learning_rate": 8.705613876038543e-05, |
| "loss": 0.042, |
| "step": 5180 |
| }, |
| { |
| "epoch": 4.976030680728667, |
| "grad_norm": 0.3559805452823639, |
| "learning_rate": 8.699765691308456e-05, |
| "loss": 0.0448, |
| "step": 5190 |
| }, |
| { |
| "epoch": 4.9856184084372, |
| "grad_norm": 0.253312885761261, |
| "learning_rate": 8.69390629880233e-05, |
| "loss": 0.0539, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.995206136145733, |
| "grad_norm": 0.29010236263275146, |
| "learning_rate": 8.688035716270141e-05, |
| "loss": 0.0447, |
| "step": 5210 |
| }, |
| { |
| "epoch": 5.004793863854267, |
| "grad_norm": 0.35962191224098206, |
| "learning_rate": 8.682153961495767e-05, |
| "loss": 0.0484, |
| "step": 5220 |
| }, |
| { |
| "epoch": 5.0143815915628, |
| "grad_norm": 0.2923009395599365, |
| "learning_rate": 8.676261052296928e-05, |
| "loss": 0.0488, |
| "step": 5230 |
| }, |
| { |
| "epoch": 5.023969319271333, |
| "grad_norm": 0.33261337876319885, |
| "learning_rate": 8.670357006525131e-05, |
| "loss": 0.053, |
| "step": 5240 |
| }, |
| { |
| "epoch": 5.033557046979865, |
| "grad_norm": 0.3641784191131592, |
| "learning_rate": 8.66444184206563e-05, |
| "loss": 0.0429, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.043144774688399, |
| "grad_norm": 0.4545520544052124, |
| "learning_rate": 8.658515576837347e-05, |
| "loss": 0.0487, |
| "step": 5260 |
| }, |
| { |
| "epoch": 5.052732502396932, |
| "grad_norm": 0.3597351312637329, |
| "learning_rate": 8.652578228792841e-05, |
| "loss": 0.0571, |
| "step": 5270 |
| }, |
| { |
| "epoch": 5.062320230105465, |
| "grad_norm": 0.26271480321884155, |
| "learning_rate": 8.646629815918244e-05, |
| "loss": 0.046, |
| "step": 5280 |
| }, |
| { |
| "epoch": 5.071907957813998, |
| "grad_norm": 0.2976760268211365, |
| "learning_rate": 8.640670356233202e-05, |
| "loss": 0.049, |
| "step": 5290 |
| }, |
| { |
| "epoch": 5.0814956855225315, |
| "grad_norm": 0.3539637327194214, |
| "learning_rate": 8.634699867790832e-05, |
| "loss": 0.046, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.091083413231064, |
| "grad_norm": 0.314113587141037, |
| "learning_rate": 8.628718368677655e-05, |
| "loss": 0.0474, |
| "step": 5310 |
| }, |
| { |
| "epoch": 5.100671140939597, |
| "grad_norm": 0.3386295735836029, |
| "learning_rate": 8.622725877013549e-05, |
| "loss": 0.0438, |
| "step": 5320 |
| }, |
| { |
| "epoch": 5.11025886864813, |
| "grad_norm": 0.4622576832771301, |
| "learning_rate": 8.616722410951689e-05, |
| "loss": 0.0447, |
| "step": 5330 |
| }, |
| { |
| "epoch": 5.1198465963566635, |
| "grad_norm": 0.23671875894069672, |
| "learning_rate": 8.610707988678503e-05, |
| "loss": 0.0457, |
| "step": 5340 |
| }, |
| { |
| "epoch": 5.129434324065197, |
| "grad_norm": 0.38376542925834656, |
| "learning_rate": 8.604682628413601e-05, |
| "loss": 0.0521, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.13902205177373, |
| "grad_norm": 0.2503417432308197, |
| "learning_rate": 8.598646348409729e-05, |
| "loss": 0.0466, |
| "step": 5360 |
| }, |
| { |
| "epoch": 5.148609779482262, |
| "grad_norm": 0.33504578471183777, |
| "learning_rate": 8.592599166952718e-05, |
| "loss": 0.0499, |
| "step": 5370 |
| }, |
| { |
| "epoch": 5.1581975071907955, |
| "grad_norm": 0.2641712725162506, |
| "learning_rate": 8.586541102361414e-05, |
| "loss": 0.0471, |
| "step": 5380 |
| }, |
| { |
| "epoch": 5.167785234899329, |
| "grad_norm": 0.363615483045578, |
| "learning_rate": 8.580472172987638e-05, |
| "loss": 0.0451, |
| "step": 5390 |
| }, |
| { |
| "epoch": 5.177372962607862, |
| "grad_norm": 0.29901939630508423, |
| "learning_rate": 8.574392397216123e-05, |
| "loss": 0.0472, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.186960690316395, |
| "grad_norm": 0.299882173538208, |
| "learning_rate": 8.568301793464457e-05, |
| "loss": 0.0492, |
| "step": 5410 |
| }, |
| { |
| "epoch": 5.196548418024928, |
| "grad_norm": 0.25945836305618286, |
| "learning_rate": 8.562200380183033e-05, |
| "loss": 0.0354, |
| "step": 5420 |
| }, |
| { |
| "epoch": 5.206136145733462, |
| "grad_norm": 0.39987847208976746, |
| "learning_rate": 8.556088175854984e-05, |
| "loss": 0.0367, |
| "step": 5430 |
| }, |
| { |
| "epoch": 5.215723873441994, |
| "grad_norm": 0.31205254793167114, |
| "learning_rate": 8.54996519899614e-05, |
| "loss": 0.0412, |
| "step": 5440 |
| }, |
| { |
| "epoch": 5.225311601150527, |
| "grad_norm": 0.3277497887611389, |
| "learning_rate": 8.543831468154955e-05, |
| "loss": 0.0502, |
| "step": 5450 |
| }, |
| { |
| "epoch": 5.23489932885906, |
| "grad_norm": 0.3311022222042084, |
| "learning_rate": 8.537687001912471e-05, |
| "loss": 0.0477, |
| "step": 5460 |
| }, |
| { |
| "epoch": 5.244487056567594, |
| "grad_norm": 0.42579907178878784, |
| "learning_rate": 8.531531818882241e-05, |
| "loss": 0.0509, |
| "step": 5470 |
| }, |
| { |
| "epoch": 5.254074784276127, |
| "grad_norm": 0.30724838376045227, |
| "learning_rate": 8.52536593771029e-05, |
| "loss": 0.0418, |
| "step": 5480 |
| }, |
| { |
| "epoch": 5.263662511984659, |
| "grad_norm": 0.3175548017024994, |
| "learning_rate": 8.519189377075049e-05, |
| "loss": 0.0507, |
| "step": 5490 |
| }, |
| { |
| "epoch": 5.273250239693192, |
| "grad_norm": 0.3461003601551056, |
| "learning_rate": 8.513002155687297e-05, |
| "loss": 0.0495, |
| "step": 5500 |
| }, |
| { |
| "epoch": 5.282837967401726, |
| "grad_norm": 0.27968931198120117, |
| "learning_rate": 8.50680429229011e-05, |
| "loss": 0.0424, |
| "step": 5510 |
| }, |
| { |
| "epoch": 5.292425695110259, |
| "grad_norm": 0.2532777190208435, |
| "learning_rate": 8.500595805658806e-05, |
| "loss": 0.0429, |
| "step": 5520 |
| }, |
| { |
| "epoch": 5.302013422818792, |
| "grad_norm": 0.2897396981716156, |
| "learning_rate": 8.494376714600878e-05, |
| "loss": 0.0479, |
| "step": 5530 |
| }, |
| { |
| "epoch": 5.311601150527325, |
| "grad_norm": 0.32838040590286255, |
| "learning_rate": 8.48814703795595e-05, |
| "loss": 0.0462, |
| "step": 5540 |
| }, |
| { |
| "epoch": 5.3211888782358585, |
| "grad_norm": 0.23218947649002075, |
| "learning_rate": 8.481906794595702e-05, |
| "loss": 0.038, |
| "step": 5550 |
| }, |
| { |
| "epoch": 5.330776605944391, |
| "grad_norm": 0.4271414577960968, |
| "learning_rate": 8.475656003423837e-05, |
| "loss": 0.0424, |
| "step": 5560 |
| }, |
| { |
| "epoch": 5.340364333652924, |
| "grad_norm": 0.3327130079269409, |
| "learning_rate": 8.469394683376003e-05, |
| "loss": 0.0461, |
| "step": 5570 |
| }, |
| { |
| "epoch": 5.349952061361457, |
| "grad_norm": 0.34635308384895325, |
| "learning_rate": 8.463122853419748e-05, |
| "loss": 0.0462, |
| "step": 5580 |
| }, |
| { |
| "epoch": 5.3595397890699905, |
| "grad_norm": 0.35077422857284546, |
| "learning_rate": 8.456840532554448e-05, |
| "loss": 0.0477, |
| "step": 5590 |
| }, |
| { |
| "epoch": 5.369127516778524, |
| "grad_norm": 0.44980722665786743, |
| "learning_rate": 8.450547739811275e-05, |
| "loss": 0.0423, |
| "step": 5600 |
| }, |
| { |
| "epoch": 5.378715244487057, |
| "grad_norm": 0.28166648745536804, |
| "learning_rate": 8.444244494253106e-05, |
| "loss": 0.0431, |
| "step": 5610 |
| }, |
| { |
| "epoch": 5.388302972195589, |
| "grad_norm": 0.33736804127693176, |
| "learning_rate": 8.437930814974499e-05, |
| "loss": 0.0479, |
| "step": 5620 |
| }, |
| { |
| "epoch": 5.3978906999041225, |
| "grad_norm": 0.25710147619247437, |
| "learning_rate": 8.43160672110161e-05, |
| "loss": 0.042, |
| "step": 5630 |
| }, |
| { |
| "epoch": 5.407478427612656, |
| "grad_norm": 0.29803675413131714, |
| "learning_rate": 8.425272231792148e-05, |
| "loss": 0.0488, |
| "step": 5640 |
| }, |
| { |
| "epoch": 5.417066155321189, |
| "grad_norm": 0.35298973321914673, |
| "learning_rate": 8.418927366235305e-05, |
| "loss": 0.042, |
| "step": 5650 |
| }, |
| { |
| "epoch": 5.426653883029722, |
| "grad_norm": 0.32311904430389404, |
| "learning_rate": 8.41257214365172e-05, |
| "loss": 0.0452, |
| "step": 5660 |
| }, |
| { |
| "epoch": 5.436241610738255, |
| "grad_norm": 0.38360047340393066, |
| "learning_rate": 8.406206583293394e-05, |
| "loss": 0.0572, |
| "step": 5670 |
| }, |
| { |
| "epoch": 5.445829338446788, |
| "grad_norm": 0.4456116855144501, |
| "learning_rate": 8.399830704443653e-05, |
| "loss": 0.0464, |
| "step": 5680 |
| }, |
| { |
| "epoch": 5.455417066155321, |
| "grad_norm": 0.3833318054676056, |
| "learning_rate": 8.393444526417071e-05, |
| "loss": 0.0461, |
| "step": 5690 |
| }, |
| { |
| "epoch": 5.465004793863854, |
| "grad_norm": 0.27611926198005676, |
| "learning_rate": 8.387048068559435e-05, |
| "loss": 0.0437, |
| "step": 5700 |
| }, |
| { |
| "epoch": 5.474592521572387, |
| "grad_norm": 0.3786008954048157, |
| "learning_rate": 8.380641350247665e-05, |
| "loss": 0.0477, |
| "step": 5710 |
| }, |
| { |
| "epoch": 5.484180249280921, |
| "grad_norm": 0.471384197473526, |
| "learning_rate": 8.37422439088976e-05, |
| "loss": 0.0449, |
| "step": 5720 |
| }, |
| { |
| "epoch": 5.493767976989454, |
| "grad_norm": 0.2924197018146515, |
| "learning_rate": 8.36779720992475e-05, |
| "loss": 0.0476, |
| "step": 5730 |
| }, |
| { |
| "epoch": 5.503355704697986, |
| "grad_norm": 0.24068906903266907, |
| "learning_rate": 8.361359826822625e-05, |
| "loss": 0.0477, |
| "step": 5740 |
| }, |
| { |
| "epoch": 5.512943432406519, |
| "grad_norm": 0.24523060023784637, |
| "learning_rate": 8.354912261084281e-05, |
| "loss": 0.0489, |
| "step": 5750 |
| }, |
| { |
| "epoch": 5.522531160115053, |
| "grad_norm": 0.3498481810092926, |
| "learning_rate": 8.348454532241461e-05, |
| "loss": 0.0387, |
| "step": 5760 |
| }, |
| { |
| "epoch": 5.532118887823586, |
| "grad_norm": 0.3108651340007782, |
| "learning_rate": 8.341986659856698e-05, |
| "loss": 0.0377, |
| "step": 5770 |
| }, |
| { |
| "epoch": 5.541706615532119, |
| "grad_norm": 0.3618451654911041, |
| "learning_rate": 8.335508663523248e-05, |
| "loss": 0.048, |
| "step": 5780 |
| }, |
| { |
| "epoch": 5.551294343240652, |
| "grad_norm": 0.769836962223053, |
| "learning_rate": 8.329020562865038e-05, |
| "loss": 0.0422, |
| "step": 5790 |
| }, |
| { |
| "epoch": 5.5608820709491855, |
| "grad_norm": 0.24395880103111267, |
| "learning_rate": 8.322522377536604e-05, |
| "loss": 0.0395, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.570469798657718, |
| "grad_norm": 0.5865891575813293, |
| "learning_rate": 8.316014127223033e-05, |
| "loss": 0.0565, |
| "step": 5810 |
| }, |
| { |
| "epoch": 5.580057526366251, |
| "grad_norm": 0.318808376789093, |
| "learning_rate": 8.3094958316399e-05, |
| "loss": 0.0453, |
| "step": 5820 |
| }, |
| { |
| "epoch": 5.589645254074784, |
| "grad_norm": 0.44590169191360474, |
| "learning_rate": 8.302967510533213e-05, |
| "loss": 0.0524, |
| "step": 5830 |
| }, |
| { |
| "epoch": 5.5992329817833175, |
| "grad_norm": 0.3664915859699249, |
| "learning_rate": 8.296429183679349e-05, |
| "loss": 0.0434, |
| "step": 5840 |
| }, |
| { |
| "epoch": 5.608820709491851, |
| "grad_norm": 0.34023183584213257, |
| "learning_rate": 8.289880870884995e-05, |
| "loss": 0.0595, |
| "step": 5850 |
| }, |
| { |
| "epoch": 5.618408437200383, |
| "grad_norm": 0.33271753787994385, |
| "learning_rate": 8.283322591987086e-05, |
| "loss": 0.0476, |
| "step": 5860 |
| }, |
| { |
| "epoch": 5.627996164908916, |
| "grad_norm": 0.30905163288116455, |
| "learning_rate": 8.276754366852754e-05, |
| "loss": 0.0486, |
| "step": 5870 |
| }, |
| { |
| "epoch": 5.6375838926174495, |
| "grad_norm": 0.3950500786304474, |
| "learning_rate": 8.27017621537926e-05, |
| "loss": 0.0524, |
| "step": 5880 |
| }, |
| { |
| "epoch": 5.647171620325983, |
| "grad_norm": 0.3802347481250763, |
| "learning_rate": 8.26358815749393e-05, |
| "loss": 0.0453, |
| "step": 5890 |
| }, |
| { |
| "epoch": 5.656759348034516, |
| "grad_norm": 0.27361515164375305, |
| "learning_rate": 8.256990213154102e-05, |
| "loss": 0.0426, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.666347075743049, |
| "grad_norm": 0.28120309114456177, |
| "learning_rate": 8.250382402347065e-05, |
| "loss": 0.0406, |
| "step": 5910 |
| }, |
| { |
| "epoch": 5.675934803451582, |
| "grad_norm": 0.44831210374832153, |
| "learning_rate": 8.243764745089999e-05, |
| "loss": 0.0433, |
| "step": 5920 |
| }, |
| { |
| "epoch": 5.685522531160115, |
| "grad_norm": 0.2854187488555908, |
| "learning_rate": 8.237137261429904e-05, |
| "loss": 0.0438, |
| "step": 5930 |
| }, |
| { |
| "epoch": 5.695110258868648, |
| "grad_norm": 0.3696000874042511, |
| "learning_rate": 8.230499971443555e-05, |
| "loss": 0.0399, |
| "step": 5940 |
| }, |
| { |
| "epoch": 5.704697986577181, |
| "grad_norm": 0.794933021068573, |
| "learning_rate": 8.223852895237427e-05, |
| "loss": 0.0452, |
| "step": 5950 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.3321564793586731, |
| "learning_rate": 8.21719605294765e-05, |
| "loss": 0.0484, |
| "step": 5960 |
| }, |
| { |
| "epoch": 5.723873441994248, |
| "grad_norm": 0.29202380776405334, |
| "learning_rate": 8.210529464739928e-05, |
| "loss": 0.0432, |
| "step": 5970 |
| }, |
| { |
| "epoch": 5.73346116970278, |
| "grad_norm": 0.32877346873283386, |
| "learning_rate": 8.203853150809494e-05, |
| "loss": 0.046, |
| "step": 5980 |
| }, |
| { |
| "epoch": 5.743048897411313, |
| "grad_norm": 0.45695215463638306, |
| "learning_rate": 8.197167131381045e-05, |
| "loss": 0.0464, |
| "step": 5990 |
| }, |
| { |
| "epoch": 5.752636625119846, |
| "grad_norm": 0.20887207984924316, |
| "learning_rate": 8.190471426708675e-05, |
| "loss": 0.0428, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.76222435282838, |
| "grad_norm": 0.31597304344177246, |
| "learning_rate": 8.183766057075819e-05, |
| "loss": 0.0409, |
| "step": 6010 |
| }, |
| { |
| "epoch": 5.771812080536913, |
| "grad_norm": 0.3338216245174408, |
| "learning_rate": 8.177051042795192e-05, |
| "loss": 0.0461, |
| "step": 6020 |
| }, |
| { |
| "epoch": 5.781399808245446, |
| "grad_norm": 0.32134512066841125, |
| "learning_rate": 8.170326404208724e-05, |
| "loss": 0.0411, |
| "step": 6030 |
| }, |
| { |
| "epoch": 5.790987535953979, |
| "grad_norm": 0.2781100571155548, |
| "learning_rate": 8.163592161687499e-05, |
| "loss": 0.0425, |
| "step": 6040 |
| }, |
| { |
| "epoch": 5.800575263662512, |
| "grad_norm": 0.34772852063179016, |
| "learning_rate": 8.156848335631697e-05, |
| "loss": 0.0368, |
| "step": 6050 |
| }, |
| { |
| "epoch": 5.810162991371045, |
| "grad_norm": 0.3309897184371948, |
| "learning_rate": 8.15009494647053e-05, |
| "loss": 0.04, |
| "step": 6060 |
| }, |
| { |
| "epoch": 5.819750719079578, |
| "grad_norm": 0.252763032913208, |
| "learning_rate": 8.143332014662176e-05, |
| "loss": 0.0398, |
| "step": 6070 |
| }, |
| { |
| "epoch": 5.829338446788111, |
| "grad_norm": 0.3265877664089203, |
| "learning_rate": 8.136559560693722e-05, |
| "loss": 0.045, |
| "step": 6080 |
| }, |
| { |
| "epoch": 5.8389261744966445, |
| "grad_norm": 0.4045432209968567, |
| "learning_rate": 8.129777605081105e-05, |
| "loss": 0.0428, |
| "step": 6090 |
| }, |
| { |
| "epoch": 5.848513902205178, |
| "grad_norm": 0.2679883539676666, |
| "learning_rate": 8.12298616836904e-05, |
| "loss": 0.0433, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.85810162991371, |
| "grad_norm": 0.4409831166267395, |
| "learning_rate": 8.116185271130965e-05, |
| "loss": 0.0457, |
| "step": 6110 |
| }, |
| { |
| "epoch": 5.867689357622243, |
| "grad_norm": 0.4434974491596222, |
| "learning_rate": 8.10937493396898e-05, |
| "loss": 0.0476, |
| "step": 6120 |
| }, |
| { |
| "epoch": 5.8772770853307765, |
| "grad_norm": 0.363570898771286, |
| "learning_rate": 8.102555177513776e-05, |
| "loss": 0.0405, |
| "step": 6130 |
| }, |
| { |
| "epoch": 5.88686481303931, |
| "grad_norm": 0.31658318638801575, |
| "learning_rate": 8.095726022424583e-05, |
| "loss": 0.0434, |
| "step": 6140 |
| }, |
| { |
| "epoch": 5.896452540747843, |
| "grad_norm": 0.3343175947666168, |
| "learning_rate": 8.088887489389099e-05, |
| "loss": 0.0421, |
| "step": 6150 |
| }, |
| { |
| "epoch": 5.906040268456376, |
| "grad_norm": 0.2580268681049347, |
| "learning_rate": 8.082039599123434e-05, |
| "loss": 0.0415, |
| "step": 6160 |
| }, |
| { |
| "epoch": 5.9156279961649085, |
| "grad_norm": 0.36179137229919434, |
| "learning_rate": 8.07518237237204e-05, |
| "loss": 0.0425, |
| "step": 6170 |
| }, |
| { |
| "epoch": 5.925215723873442, |
| "grad_norm": 0.3440069556236267, |
| "learning_rate": 8.068315829907658e-05, |
| "loss": 0.0404, |
| "step": 6180 |
| }, |
| { |
| "epoch": 5.934803451581975, |
| "grad_norm": 0.39785268902778625, |
| "learning_rate": 8.061439992531241e-05, |
| "loss": 0.0425, |
| "step": 6190 |
| }, |
| { |
| "epoch": 5.944391179290508, |
| "grad_norm": 0.29912492632865906, |
| "learning_rate": 8.054554881071909e-05, |
| "loss": 0.0465, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.953978906999041, |
| "grad_norm": 0.3317604660987854, |
| "learning_rate": 8.047660516386868e-05, |
| "loss": 0.0432, |
| "step": 6210 |
| }, |
| { |
| "epoch": 5.963566634707575, |
| "grad_norm": 0.3451102077960968, |
| "learning_rate": 8.040756919361358e-05, |
| "loss": 0.0452, |
| "step": 6220 |
| }, |
| { |
| "epoch": 5.973154362416107, |
| "grad_norm": 0.3293020725250244, |
| "learning_rate": 8.03384411090859e-05, |
| "loss": 0.0367, |
| "step": 6230 |
| }, |
| { |
| "epoch": 5.98274209012464, |
| "grad_norm": 0.30293816328048706, |
| "learning_rate": 8.026922111969674e-05, |
| "loss": 0.0442, |
| "step": 6240 |
| }, |
| { |
| "epoch": 5.992329817833173, |
| "grad_norm": 0.2671773433685303, |
| "learning_rate": 8.019990943513565e-05, |
| "loss": 0.0482, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.001917545541707, |
| "grad_norm": 0.30587103962898254, |
| "learning_rate": 8.013050626536992e-05, |
| "loss": 0.054, |
| "step": 6260 |
| }, |
| { |
| "epoch": 6.01150527325024, |
| "grad_norm": 0.3319852948188782, |
| "learning_rate": 8.0061011820644e-05, |
| "loss": 0.0454, |
| "step": 6270 |
| }, |
| { |
| "epoch": 6.021093000958773, |
| "grad_norm": 0.5606246590614319, |
| "learning_rate": 7.999142631147884e-05, |
| "loss": 0.0491, |
| "step": 6280 |
| }, |
| { |
| "epoch": 6.030680728667305, |
| "grad_norm": 0.3884483873844147, |
| "learning_rate": 7.992174994867123e-05, |
| "loss": 0.0488, |
| "step": 6290 |
| }, |
| { |
| "epoch": 6.040268456375839, |
| "grad_norm": 0.30733785033226013, |
| "learning_rate": 7.985198294329324e-05, |
| "loss": 0.0434, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.049856184084372, |
| "grad_norm": 0.9947719573974609, |
| "learning_rate": 7.978212550669144e-05, |
| "loss": 0.0452, |
| "step": 6310 |
| }, |
| { |
| "epoch": 6.059443911792905, |
| "grad_norm": 0.3336857259273529, |
| "learning_rate": 7.971217785048644e-05, |
| "loss": 0.0445, |
| "step": 6320 |
| }, |
| { |
| "epoch": 6.069031639501438, |
| "grad_norm": 0.3001098930835724, |
| "learning_rate": 7.964214018657208e-05, |
| "loss": 0.042, |
| "step": 6330 |
| }, |
| { |
| "epoch": 6.0786193672099715, |
| "grad_norm": 0.32423412799835205, |
| "learning_rate": 7.957201272711492e-05, |
| "loss": 0.041, |
| "step": 6340 |
| }, |
| { |
| "epoch": 6.088207094918504, |
| "grad_norm": 0.2871480882167816, |
| "learning_rate": 7.950179568455347e-05, |
| "loss": 0.0436, |
| "step": 6350 |
| }, |
| { |
| "epoch": 6.097794822627037, |
| "grad_norm": 0.4804290533065796, |
| "learning_rate": 7.94314892715977e-05, |
| "loss": 0.0393, |
| "step": 6360 |
| }, |
| { |
| "epoch": 6.10738255033557, |
| "grad_norm": 0.459533154964447, |
| "learning_rate": 7.936109370122824e-05, |
| "loss": 0.0468, |
| "step": 6370 |
| }, |
| { |
| "epoch": 6.1169702780441035, |
| "grad_norm": 0.25455859303474426, |
| "learning_rate": 7.929060918669585e-05, |
| "loss": 0.0409, |
| "step": 6380 |
| }, |
| { |
| "epoch": 6.126558005752637, |
| "grad_norm": 0.34990832209587097, |
| "learning_rate": 7.922003594152068e-05, |
| "loss": 0.0389, |
| "step": 6390 |
| }, |
| { |
| "epoch": 6.13614573346117, |
| "grad_norm": 0.2321031242609024, |
| "learning_rate": 7.914937417949175e-05, |
| "loss": 0.0428, |
| "step": 6400 |
| }, |
| { |
| "epoch": 6.145733461169703, |
| "grad_norm": 0.3366633951663971, |
| "learning_rate": 7.907862411466616e-05, |
| "loss": 0.0417, |
| "step": 6410 |
| }, |
| { |
| "epoch": 6.1553211888782355, |
| "grad_norm": 0.3831850588321686, |
| "learning_rate": 7.900778596136855e-05, |
| "loss": 0.0409, |
| "step": 6420 |
| }, |
| { |
| "epoch": 6.164908916586769, |
| "grad_norm": 0.3772655129432678, |
| "learning_rate": 7.893685993419036e-05, |
| "loss": 0.0412, |
| "step": 6430 |
| }, |
| { |
| "epoch": 6.174496644295302, |
| "grad_norm": 0.4264662563800812, |
| "learning_rate": 7.88658462479893e-05, |
| "loss": 0.0437, |
| "step": 6440 |
| }, |
| { |
| "epoch": 6.184084372003835, |
| "grad_norm": 0.3162544369697571, |
| "learning_rate": 7.879474511788854e-05, |
| "loss": 0.0388, |
| "step": 6450 |
| }, |
| { |
| "epoch": 6.193672099712368, |
| "grad_norm": 0.34539514780044556, |
| "learning_rate": 7.872355675927623e-05, |
| "loss": 0.0416, |
| "step": 6460 |
| }, |
| { |
| "epoch": 6.203259827420902, |
| "grad_norm": 0.3206475079059601, |
| "learning_rate": 7.865228138780469e-05, |
| "loss": 0.0468, |
| "step": 6470 |
| }, |
| { |
| "epoch": 6.212847555129434, |
| "grad_norm": 0.3619016110897064, |
| "learning_rate": 7.858091921938988e-05, |
| "loss": 0.0448, |
| "step": 6480 |
| }, |
| { |
| "epoch": 6.222435282837967, |
| "grad_norm": 0.3190850615501404, |
| "learning_rate": 7.850947047021069e-05, |
| "loss": 0.0388, |
| "step": 6490 |
| }, |
| { |
| "epoch": 6.2320230105465, |
| "grad_norm": 0.3191368579864502, |
| "learning_rate": 7.843793535670827e-05, |
| "loss": 0.0449, |
| "step": 6500 |
| }, |
| { |
| "epoch": 6.241610738255034, |
| "grad_norm": 0.24938683211803436, |
| "learning_rate": 7.836631409558538e-05, |
| "loss": 0.0379, |
| "step": 6510 |
| }, |
| { |
| "epoch": 6.251198465963567, |
| "grad_norm": 0.27279171347618103, |
| "learning_rate": 7.829460690380584e-05, |
| "loss": 0.0398, |
| "step": 6520 |
| }, |
| { |
| "epoch": 6.2607861936721, |
| "grad_norm": 0.4261578917503357, |
| "learning_rate": 7.822281399859365e-05, |
| "loss": 0.0441, |
| "step": 6530 |
| }, |
| { |
| "epoch": 6.270373921380632, |
| "grad_norm": 0.3505672216415405, |
| "learning_rate": 7.815093559743256e-05, |
| "loss": 0.0464, |
| "step": 6540 |
| }, |
| { |
| "epoch": 6.279961649089166, |
| "grad_norm": 0.8695809841156006, |
| "learning_rate": 7.807897191806527e-05, |
| "loss": 0.0459, |
| "step": 6550 |
| }, |
| { |
| "epoch": 6.289549376797699, |
| "grad_norm": 0.3453594446182251, |
| "learning_rate": 7.800692317849285e-05, |
| "loss": 0.0437, |
| "step": 6560 |
| }, |
| { |
| "epoch": 6.299137104506232, |
| "grad_norm": 0.4360389709472656, |
| "learning_rate": 7.7934789596974e-05, |
| "loss": 0.0495, |
| "step": 6570 |
| }, |
| { |
| "epoch": 6.308724832214765, |
| "grad_norm": 0.4259977340698242, |
| "learning_rate": 7.786257139202447e-05, |
| "loss": 0.0486, |
| "step": 6580 |
| }, |
| { |
| "epoch": 6.3183125599232985, |
| "grad_norm": 0.4518745541572571, |
| "learning_rate": 7.779026878241635e-05, |
| "loss": 0.0455, |
| "step": 6590 |
| }, |
| { |
| "epoch": 6.327900287631831, |
| "grad_norm": 0.38590195775032043, |
| "learning_rate": 7.771788198717741e-05, |
| "loss": 0.043, |
| "step": 6600 |
| }, |
| { |
| "epoch": 6.337488015340364, |
| "grad_norm": 0.2825833559036255, |
| "learning_rate": 7.764541122559046e-05, |
| "loss": 0.0439, |
| "step": 6610 |
| }, |
| { |
| "epoch": 6.347075743048897, |
| "grad_norm": 0.364486962556839, |
| "learning_rate": 7.757285671719264e-05, |
| "loss": 0.0429, |
| "step": 6620 |
| }, |
| { |
| "epoch": 6.3566634707574305, |
| "grad_norm": 0.32037052512168884, |
| "learning_rate": 7.750021868177485e-05, |
| "loss": 0.0433, |
| "step": 6630 |
| }, |
| { |
| "epoch": 6.366251198465964, |
| "grad_norm": 0.2986597716808319, |
| "learning_rate": 7.742749733938094e-05, |
| "loss": 0.0407, |
| "step": 6640 |
| }, |
| { |
| "epoch": 6.375838926174497, |
| "grad_norm": 0.20917120575904846, |
| "learning_rate": 7.73546929103072e-05, |
| "loss": 0.0361, |
| "step": 6650 |
| }, |
| { |
| "epoch": 6.385426653883029, |
| "grad_norm": 0.3319404125213623, |
| "learning_rate": 7.728180561510155e-05, |
| "loss": 0.04, |
| "step": 6660 |
| }, |
| { |
| "epoch": 6.3950143815915625, |
| "grad_norm": 0.4171611964702606, |
| "learning_rate": 7.720883567456298e-05, |
| "loss": 0.0348, |
| "step": 6670 |
| }, |
| { |
| "epoch": 6.404602109300096, |
| "grad_norm": 0.44948673248291016, |
| "learning_rate": 7.713578330974081e-05, |
| "loss": 0.0489, |
| "step": 6680 |
| }, |
| { |
| "epoch": 6.414189837008629, |
| "grad_norm": 0.3433539569377899, |
| "learning_rate": 7.706264874193409e-05, |
| "loss": 0.038, |
| "step": 6690 |
| }, |
| { |
| "epoch": 6.423777564717162, |
| "grad_norm": 0.44886866211891174, |
| "learning_rate": 7.698943219269086e-05, |
| "loss": 0.0437, |
| "step": 6700 |
| }, |
| { |
| "epoch": 6.433365292425695, |
| "grad_norm": 0.30656543374061584, |
| "learning_rate": 7.691613388380752e-05, |
| "loss": 0.0409, |
| "step": 6710 |
| }, |
| { |
| "epoch": 6.442953020134228, |
| "grad_norm": 0.3929513692855835, |
| "learning_rate": 7.684275403732811e-05, |
| "loss": 0.0441, |
| "step": 6720 |
| }, |
| { |
| "epoch": 6.452540747842761, |
| "grad_norm": 0.44606807827949524, |
| "learning_rate": 7.676929287554372e-05, |
| "loss": 0.0457, |
| "step": 6730 |
| }, |
| { |
| "epoch": 6.462128475551294, |
| "grad_norm": 0.3216160535812378, |
| "learning_rate": 7.669575062099175e-05, |
| "loss": 0.0469, |
| "step": 6740 |
| }, |
| { |
| "epoch": 6.471716203259827, |
| "grad_norm": 0.24256640672683716, |
| "learning_rate": 7.662212749645527e-05, |
| "loss": 0.0384, |
| "step": 6750 |
| }, |
| { |
| "epoch": 6.481303930968361, |
| "grad_norm": 0.37510934472084045, |
| "learning_rate": 7.654842372496232e-05, |
| "loss": 0.0389, |
| "step": 6760 |
| }, |
| { |
| "epoch": 6.490891658676894, |
| "grad_norm": 0.3382836878299713, |
| "learning_rate": 7.647463952978524e-05, |
| "loss": 0.0448, |
| "step": 6770 |
| }, |
| { |
| "epoch": 6.500479386385427, |
| "grad_norm": 0.4976375102996826, |
| "learning_rate": 7.640077513443999e-05, |
| "loss": 0.0413, |
| "step": 6780 |
| }, |
| { |
| "epoch": 6.510067114093959, |
| "grad_norm": 0.273062527179718, |
| "learning_rate": 7.632683076268552e-05, |
| "loss": 0.0432, |
| "step": 6790 |
| }, |
| { |
| "epoch": 6.519654841802493, |
| "grad_norm": 0.34846237301826477, |
| "learning_rate": 7.625280663852301e-05, |
| "loss": 0.0501, |
| "step": 6800 |
| }, |
| { |
| "epoch": 6.529242569511026, |
| "grad_norm": 0.26076826453208923, |
| "learning_rate": 7.617870298619527e-05, |
| "loss": 0.0428, |
| "step": 6810 |
| }, |
| { |
| "epoch": 6.538830297219559, |
| "grad_norm": 0.8371449708938599, |
| "learning_rate": 7.610452003018602e-05, |
| "loss": 0.0437, |
| "step": 6820 |
| }, |
| { |
| "epoch": 6.548418024928092, |
| "grad_norm": 0.28489676117897034, |
| "learning_rate": 7.603025799521918e-05, |
| "loss": 0.0446, |
| "step": 6830 |
| }, |
| { |
| "epoch": 6.558005752636625, |
| "grad_norm": 0.3971545994281769, |
| "learning_rate": 7.595591710625829e-05, |
| "loss": 0.045, |
| "step": 6840 |
| }, |
| { |
| "epoch": 6.567593480345158, |
| "grad_norm": 0.24828213453292847, |
| "learning_rate": 7.588149758850572e-05, |
| "loss": 0.0431, |
| "step": 6850 |
| }, |
| { |
| "epoch": 6.577181208053691, |
| "grad_norm": 0.23631419241428375, |
| "learning_rate": 7.580699966740201e-05, |
| "loss": 0.0384, |
| "step": 6860 |
| }, |
| { |
| "epoch": 6.586768935762224, |
| "grad_norm": 0.3739171326160431, |
| "learning_rate": 7.57324235686253e-05, |
| "loss": 0.0513, |
| "step": 6870 |
| }, |
| { |
| "epoch": 6.5963566634707576, |
| "grad_norm": 0.29776638746261597, |
| "learning_rate": 7.565776951809043e-05, |
| "loss": 0.0437, |
| "step": 6880 |
| }, |
| { |
| "epoch": 6.605944391179291, |
| "grad_norm": 0.24786557257175446, |
| "learning_rate": 7.558303774194848e-05, |
| "loss": 0.045, |
| "step": 6890 |
| }, |
| { |
| "epoch": 6.615532118887824, |
| "grad_norm": 0.2621402442455292, |
| "learning_rate": 7.550822846658592e-05, |
| "loss": 0.036, |
| "step": 6900 |
| }, |
| { |
| "epoch": 6.625119846596356, |
| "grad_norm": 0.4778667092323303, |
| "learning_rate": 7.543334191862408e-05, |
| "loss": 0.0403, |
| "step": 6910 |
| }, |
| { |
| "epoch": 6.6347075743048896, |
| "grad_norm": 0.37852802872657776, |
| "learning_rate": 7.535837832491826e-05, |
| "loss": 0.0433, |
| "step": 6920 |
| }, |
| { |
| "epoch": 6.644295302013423, |
| "grad_norm": 0.5725548267364502, |
| "learning_rate": 7.528333791255723e-05, |
| "loss": 0.0434, |
| "step": 6930 |
| }, |
| { |
| "epoch": 6.653883029721956, |
| "grad_norm": 0.39372578263282776, |
| "learning_rate": 7.520822090886245e-05, |
| "loss": 0.0403, |
| "step": 6940 |
| }, |
| { |
| "epoch": 6.663470757430489, |
| "grad_norm": 0.2831190526485443, |
| "learning_rate": 7.513302754138741e-05, |
| "loss": 0.0424, |
| "step": 6950 |
| }, |
| { |
| "epoch": 6.673058485139022, |
| "grad_norm": 0.27865827083587646, |
| "learning_rate": 7.50577580379169e-05, |
| "loss": 0.0397, |
| "step": 6960 |
| }, |
| { |
| "epoch": 6.682646212847555, |
| "grad_norm": 0.42975571751594543, |
| "learning_rate": 7.49824126264664e-05, |
| "loss": 0.0426, |
| "step": 6970 |
| }, |
| { |
| "epoch": 6.692233940556088, |
| "grad_norm": 0.3423265218734741, |
| "learning_rate": 7.490699153528124e-05, |
| "loss": 0.045, |
| "step": 6980 |
| }, |
| { |
| "epoch": 6.701821668264621, |
| "grad_norm": 0.25411704182624817, |
| "learning_rate": 7.483149499283616e-05, |
| "loss": 0.0396, |
| "step": 6990 |
| }, |
| { |
| "epoch": 6.7114093959731544, |
| "grad_norm": 0.35409414768218994, |
| "learning_rate": 7.475592322783434e-05, |
| "loss": 0.0382, |
| "step": 7000 |
| }, |
| { |
| "epoch": 6.720997123681688, |
| "grad_norm": 0.28262168169021606, |
| "learning_rate": 7.468027646920687e-05, |
| "loss": 0.045, |
| "step": 7010 |
| }, |
| { |
| "epoch": 6.730584851390221, |
| "grad_norm": 0.4541366398334503, |
| "learning_rate": 7.460455494611206e-05, |
| "loss": 0.0389, |
| "step": 7020 |
| }, |
| { |
| "epoch": 6.740172579098753, |
| "grad_norm": 0.27586543560028076, |
| "learning_rate": 7.452875888793465e-05, |
| "loss": 0.0352, |
| "step": 7030 |
| }, |
| { |
| "epoch": 6.7497603068072864, |
| "grad_norm": 0.2681753933429718, |
| "learning_rate": 7.445288852428518e-05, |
| "loss": 0.0492, |
| "step": 7040 |
| }, |
| { |
| "epoch": 6.75934803451582, |
| "grad_norm": 0.32088425755500793, |
| "learning_rate": 7.437694408499933e-05, |
| "loss": 0.0524, |
| "step": 7050 |
| }, |
| { |
| "epoch": 6.768935762224353, |
| "grad_norm": 0.3608848452568054, |
| "learning_rate": 7.430092580013712e-05, |
| "loss": 0.0444, |
| "step": 7060 |
| }, |
| { |
| "epoch": 6.778523489932886, |
| "grad_norm": 0.2983666658401489, |
| "learning_rate": 7.42248338999823e-05, |
| "loss": 0.0484, |
| "step": 7070 |
| }, |
| { |
| "epoch": 6.788111217641419, |
| "grad_norm": 0.48037657141685486, |
| "learning_rate": 7.414866861504164e-05, |
| "loss": 0.0441, |
| "step": 7080 |
| }, |
| { |
| "epoch": 6.797698945349952, |
| "grad_norm": 0.3220434784889221, |
| "learning_rate": 7.407243017604418e-05, |
| "loss": 0.0407, |
| "step": 7090 |
| }, |
| { |
| "epoch": 6.807286673058485, |
| "grad_norm": 0.21454603970050812, |
| "learning_rate": 7.399611881394061e-05, |
| "loss": 0.0484, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.816874400767018, |
| "grad_norm": 0.3658502995967865, |
| "learning_rate": 7.391973475990247e-05, |
| "loss": 0.0471, |
| "step": 7110 |
| }, |
| { |
| "epoch": 6.826462128475551, |
| "grad_norm": 0.6076493859291077, |
| "learning_rate": 7.384327824532158e-05, |
| "loss": 0.0512, |
| "step": 7120 |
| }, |
| { |
| "epoch": 6.836049856184085, |
| "grad_norm": 0.27629798650741577, |
| "learning_rate": 7.376674950180918e-05, |
| "loss": 0.0432, |
| "step": 7130 |
| }, |
| { |
| "epoch": 6.845637583892618, |
| "grad_norm": 0.4255768954753876, |
| "learning_rate": 7.36901487611954e-05, |
| "loss": 0.042, |
| "step": 7140 |
| }, |
| { |
| "epoch": 6.855225311601151, |
| "grad_norm": 0.34027740359306335, |
| "learning_rate": 7.361347625552842e-05, |
| "loss": 0.0417, |
| "step": 7150 |
| }, |
| { |
| "epoch": 6.864813039309683, |
| "grad_norm": 0.29743191599845886, |
| "learning_rate": 7.353673221707382e-05, |
| "loss": 0.0506, |
| "step": 7160 |
| }, |
| { |
| "epoch": 6.874400767018217, |
| "grad_norm": 0.2994328439235687, |
| "learning_rate": 7.345991687831393e-05, |
| "loss": 0.042, |
| "step": 7170 |
| }, |
| { |
| "epoch": 6.88398849472675, |
| "grad_norm": 0.2891611158847809, |
| "learning_rate": 7.338303047194697e-05, |
| "loss": 0.0396, |
| "step": 7180 |
| }, |
| { |
| "epoch": 6.893576222435283, |
| "grad_norm": 0.2870160937309265, |
| "learning_rate": 7.330607323088657e-05, |
| "loss": 0.0477, |
| "step": 7190 |
| }, |
| { |
| "epoch": 6.903163950143816, |
| "grad_norm": 0.4798467457294464, |
| "learning_rate": 7.322904538826083e-05, |
| "loss": 0.0409, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.912751677852349, |
| "grad_norm": 0.30976602435112, |
| "learning_rate": 7.31519471774118e-05, |
| "loss": 0.0431, |
| "step": 7210 |
| }, |
| { |
| "epoch": 6.922339405560882, |
| "grad_norm": 0.32751721143722534, |
| "learning_rate": 7.307477883189463e-05, |
| "loss": 0.0415, |
| "step": 7220 |
| }, |
| { |
| "epoch": 6.931927133269415, |
| "grad_norm": 0.3902662992477417, |
| "learning_rate": 7.299754058547704e-05, |
| "loss": 0.0359, |
| "step": 7230 |
| }, |
| { |
| "epoch": 6.941514860977948, |
| "grad_norm": 0.21194472908973694, |
| "learning_rate": 7.292023267213835e-05, |
| "loss": 0.0409, |
| "step": 7240 |
| }, |
| { |
| "epoch": 6.9511025886864815, |
| "grad_norm": 0.28738507628440857, |
| "learning_rate": 7.284285532606906e-05, |
| "loss": 0.0433, |
| "step": 7250 |
| }, |
| { |
| "epoch": 6.960690316395015, |
| "grad_norm": 0.27712157368659973, |
| "learning_rate": 7.276540878166996e-05, |
| "loss": 0.0445, |
| "step": 7260 |
| }, |
| { |
| "epoch": 6.970278044103548, |
| "grad_norm": 0.36444854736328125, |
| "learning_rate": 7.268789327355143e-05, |
| "loss": 0.0424, |
| "step": 7270 |
| }, |
| { |
| "epoch": 6.97986577181208, |
| "grad_norm": 0.26638609170913696, |
| "learning_rate": 7.261030903653278e-05, |
| "loss": 0.0415, |
| "step": 7280 |
| }, |
| { |
| "epoch": 6.9894534995206135, |
| "grad_norm": 0.29326483607292175, |
| "learning_rate": 7.253265630564155e-05, |
| "loss": 0.0404, |
| "step": 7290 |
| }, |
| { |
| "epoch": 6.999041227229147, |
| "grad_norm": 0.563951849937439, |
| "learning_rate": 7.245493531611274e-05, |
| "loss": 0.0462, |
| "step": 7300 |
| }, |
| { |
| "epoch": 7.00862895493768, |
| "grad_norm": 0.2669621407985687, |
| "learning_rate": 7.237714630338812e-05, |
| "loss": 0.0489, |
| "step": 7310 |
| }, |
| { |
| "epoch": 7.018216682646213, |
| "grad_norm": 0.29936525225639343, |
| "learning_rate": 7.229928950311558e-05, |
| "loss": 0.042, |
| "step": 7320 |
| }, |
| { |
| "epoch": 7.027804410354746, |
| "grad_norm": 0.29611873626708984, |
| "learning_rate": 7.222136515114828e-05, |
| "loss": 0.0451, |
| "step": 7330 |
| }, |
| { |
| "epoch": 7.037392138063279, |
| "grad_norm": 0.2841253876686096, |
| "learning_rate": 7.214337348354408e-05, |
| "loss": 0.0401, |
| "step": 7340 |
| }, |
| { |
| "epoch": 7.046979865771812, |
| "grad_norm": 0.39095616340637207, |
| "learning_rate": 7.206531473656473e-05, |
| "loss": 0.0443, |
| "step": 7350 |
| }, |
| { |
| "epoch": 7.056567593480345, |
| "grad_norm": 0.3568895757198334, |
| "learning_rate": 7.19871891466752e-05, |
| "loss": 0.04, |
| "step": 7360 |
| }, |
| { |
| "epoch": 7.066155321188878, |
| "grad_norm": 0.4422648549079895, |
| "learning_rate": 7.190899695054293e-05, |
| "loss": 0.0357, |
| "step": 7370 |
| }, |
| { |
| "epoch": 7.075743048897412, |
| "grad_norm": 0.3040291965007782, |
| "learning_rate": 7.183073838503715e-05, |
| "loss": 0.0375, |
| "step": 7380 |
| }, |
| { |
| "epoch": 7.085330776605945, |
| "grad_norm": 0.3379688560962677, |
| "learning_rate": 7.175241368722812e-05, |
| "loss": 0.0441, |
| "step": 7390 |
| }, |
| { |
| "epoch": 7.094918504314477, |
| "grad_norm": 0.23404334485530853, |
| "learning_rate": 7.167402309438649e-05, |
| "loss": 0.0438, |
| "step": 7400 |
| }, |
| { |
| "epoch": 7.10450623202301, |
| "grad_norm": 0.19392350316047668, |
| "learning_rate": 7.159556684398246e-05, |
| "loss": 0.0429, |
| "step": 7410 |
| }, |
| { |
| "epoch": 7.114093959731544, |
| "grad_norm": 0.3650771975517273, |
| "learning_rate": 7.151704517368513e-05, |
| "loss": 0.0417, |
| "step": 7420 |
| }, |
| { |
| "epoch": 7.123681687440077, |
| "grad_norm": 0.3727266788482666, |
| "learning_rate": 7.143845832136188e-05, |
| "loss": 0.0381, |
| "step": 7430 |
| }, |
| { |
| "epoch": 7.13326941514861, |
| "grad_norm": 0.2589777410030365, |
| "learning_rate": 7.13598065250774e-05, |
| "loss": 0.046, |
| "step": 7440 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.3064965009689331, |
| "learning_rate": 7.128109002309324e-05, |
| "loss": 0.0419, |
| "step": 7450 |
| }, |
| { |
| "epoch": 7.152444870565676, |
| "grad_norm": 0.3681334853172302, |
| "learning_rate": 7.120230905386688e-05, |
| "loss": 0.0456, |
| "step": 7460 |
| }, |
| { |
| "epoch": 7.162032598274209, |
| "grad_norm": 0.23908288776874542, |
| "learning_rate": 7.112346385605115e-05, |
| "loss": 0.0395, |
| "step": 7470 |
| }, |
| { |
| "epoch": 7.171620325982742, |
| "grad_norm": 0.26035764813423157, |
| "learning_rate": 7.104455466849339e-05, |
| "loss": 0.0411, |
| "step": 7480 |
| }, |
| { |
| "epoch": 7.181208053691275, |
| "grad_norm": 0.25808098912239075, |
| "learning_rate": 7.096558173023486e-05, |
| "loss": 0.0405, |
| "step": 7490 |
| }, |
| { |
| "epoch": 7.1907957813998085, |
| "grad_norm": 0.21516771614551544, |
| "learning_rate": 7.088654528050986e-05, |
| "loss": 0.0411, |
| "step": 7500 |
| }, |
| { |
| "epoch": 7.200383509108342, |
| "grad_norm": 0.27496856451034546, |
| "learning_rate": 7.080744555874517e-05, |
| "loss": 0.0332, |
| "step": 7510 |
| }, |
| { |
| "epoch": 7.209971236816874, |
| "grad_norm": 0.43999767303466797, |
| "learning_rate": 7.072828280455917e-05, |
| "loss": 0.0384, |
| "step": 7520 |
| }, |
| { |
| "epoch": 7.219558964525407, |
| "grad_norm": 0.3292781710624695, |
| "learning_rate": 7.06490572577612e-05, |
| "loss": 0.042, |
| "step": 7530 |
| }, |
| { |
| "epoch": 7.2291466922339405, |
| "grad_norm": 0.3117612600326538, |
| "learning_rate": 7.056976915835087e-05, |
| "loss": 0.0387, |
| "step": 7540 |
| }, |
| { |
| "epoch": 7.238734419942474, |
| "grad_norm": 0.2206171602010727, |
| "learning_rate": 7.049041874651722e-05, |
| "loss": 0.0362, |
| "step": 7550 |
| }, |
| { |
| "epoch": 7.248322147651007, |
| "grad_norm": 0.2644396722316742, |
| "learning_rate": 7.04110062626381e-05, |
| "loss": 0.0373, |
| "step": 7560 |
| }, |
| { |
| "epoch": 7.25790987535954, |
| "grad_norm": 0.2682825028896332, |
| "learning_rate": 7.033153194727934e-05, |
| "loss": 0.039, |
| "step": 7570 |
| }, |
| { |
| "epoch": 7.2674976030680725, |
| "grad_norm": 0.3411322832107544, |
| "learning_rate": 7.025199604119416e-05, |
| "loss": 0.0454, |
| "step": 7580 |
| }, |
| { |
| "epoch": 7.277085330776606, |
| "grad_norm": 0.3761787712574005, |
| "learning_rate": 7.017239878532227e-05, |
| "loss": 0.0379, |
| "step": 7590 |
| }, |
| { |
| "epoch": 7.286673058485139, |
| "grad_norm": 0.24610835313796997, |
| "learning_rate": 7.009274042078927e-05, |
| "loss": 0.0465, |
| "step": 7600 |
| }, |
| { |
| "epoch": 7.296260786193672, |
| "grad_norm": 0.3763638138771057, |
| "learning_rate": 7.00130211889059e-05, |
| "loss": 0.0351, |
| "step": 7610 |
| }, |
| { |
| "epoch": 7.305848513902205, |
| "grad_norm": 0.2616029679775238, |
| "learning_rate": 6.993324133116726e-05, |
| "loss": 0.039, |
| "step": 7620 |
| }, |
| { |
| "epoch": 7.315436241610739, |
| "grad_norm": 0.40914463996887207, |
| "learning_rate": 6.985340108925209e-05, |
| "loss": 0.0417, |
| "step": 7630 |
| }, |
| { |
| "epoch": 7.325023969319272, |
| "grad_norm": 0.3503078520298004, |
| "learning_rate": 6.977350070502208e-05, |
| "loss": 0.0456, |
| "step": 7640 |
| }, |
| { |
| "epoch": 7.334611697027804, |
| "grad_norm": 0.40051010251045227, |
| "learning_rate": 6.96935404205211e-05, |
| "loss": 0.047, |
| "step": 7650 |
| }, |
| { |
| "epoch": 7.344199424736337, |
| "grad_norm": 0.3985821306705475, |
| "learning_rate": 6.96135204779745e-05, |
| "loss": 0.0409, |
| "step": 7660 |
| }, |
| { |
| "epoch": 7.353787152444871, |
| "grad_norm": 0.5366324782371521, |
| "learning_rate": 6.95334411197883e-05, |
| "loss": 0.0445, |
| "step": 7670 |
| }, |
| { |
| "epoch": 7.363374880153404, |
| "grad_norm": 0.2314271628856659, |
| "learning_rate": 6.945330258854854e-05, |
| "loss": 0.0345, |
| "step": 7680 |
| }, |
| { |
| "epoch": 7.372962607861937, |
| "grad_norm": 0.24734103679656982, |
| "learning_rate": 6.937310512702056e-05, |
| "loss": 0.0354, |
| "step": 7690 |
| }, |
| { |
| "epoch": 7.382550335570469, |
| "grad_norm": 0.7746879458427429, |
| "learning_rate": 6.929284897814812e-05, |
| "loss": 0.0398, |
| "step": 7700 |
| }, |
| { |
| "epoch": 7.392138063279003, |
| "grad_norm": 0.3436695635318756, |
| "learning_rate": 6.921253438505285e-05, |
| "loss": 0.0426, |
| "step": 7710 |
| }, |
| { |
| "epoch": 7.401725790987536, |
| "grad_norm": 0.3027035593986511, |
| "learning_rate": 6.913216159103339e-05, |
| "loss": 0.0365, |
| "step": 7720 |
| }, |
| { |
| "epoch": 7.411313518696069, |
| "grad_norm": 0.23207184672355652, |
| "learning_rate": 6.905173083956468e-05, |
| "loss": 0.0397, |
| "step": 7730 |
| }, |
| { |
| "epoch": 7.420901246404602, |
| "grad_norm": 0.2601774036884308, |
| "learning_rate": 6.897124237429726e-05, |
| "loss": 0.0377, |
| "step": 7740 |
| }, |
| { |
| "epoch": 7.4304889741131355, |
| "grad_norm": 0.37864232063293457, |
| "learning_rate": 6.889069643905646e-05, |
| "loss": 0.0426, |
| "step": 7750 |
| }, |
| { |
| "epoch": 7.440076701821669, |
| "grad_norm": 0.29199257493019104, |
| "learning_rate": 6.881009327784176e-05, |
| "loss": 0.0414, |
| "step": 7760 |
| }, |
| { |
| "epoch": 7.449664429530201, |
| "grad_norm": 0.39418113231658936, |
| "learning_rate": 6.872943313482596e-05, |
| "loss": 0.04, |
| "step": 7770 |
| }, |
| { |
| "epoch": 7.459252157238734, |
| "grad_norm": 0.2868475615978241, |
| "learning_rate": 6.864871625435448e-05, |
| "loss": 0.0373, |
| "step": 7780 |
| }, |
| { |
| "epoch": 7.4688398849472675, |
| "grad_norm": 0.27719494700431824, |
| "learning_rate": 6.856794288094461e-05, |
| "loss": 0.0401, |
| "step": 7790 |
| }, |
| { |
| "epoch": 7.478427612655801, |
| "grad_norm": 0.33910930156707764, |
| "learning_rate": 6.848711325928481e-05, |
| "loss": 0.0375, |
| "step": 7800 |
| }, |
| { |
| "epoch": 7.488015340364334, |
| "grad_norm": 0.4122414290904999, |
| "learning_rate": 6.840622763423391e-05, |
| "loss": 0.0437, |
| "step": 7810 |
| }, |
| { |
| "epoch": 7.497603068072867, |
| "grad_norm": 0.2600208818912506, |
| "learning_rate": 6.832528625082036e-05, |
| "loss": 0.0418, |
| "step": 7820 |
| }, |
| { |
| "epoch": 7.5071907957813995, |
| "grad_norm": 0.27382367849349976, |
| "learning_rate": 6.824428935424158e-05, |
| "loss": 0.0512, |
| "step": 7830 |
| }, |
| { |
| "epoch": 7.516778523489933, |
| "grad_norm": 0.27426889538764954, |
| "learning_rate": 6.816323718986313e-05, |
| "loss": 0.0339, |
| "step": 7840 |
| }, |
| { |
| "epoch": 7.526366251198466, |
| "grad_norm": 0.32315194606781006, |
| "learning_rate": 6.808213000321796e-05, |
| "loss": 0.0387, |
| "step": 7850 |
| }, |
| { |
| "epoch": 7.535953978906999, |
| "grad_norm": 0.2910844683647156, |
| "learning_rate": 6.80009680400058e-05, |
| "loss": 0.0351, |
| "step": 7860 |
| }, |
| { |
| "epoch": 7.545541706615532, |
| "grad_norm": 0.3915770649909973, |
| "learning_rate": 6.791975154609216e-05, |
| "loss": 0.0439, |
| "step": 7870 |
| }, |
| { |
| "epoch": 7.555129434324066, |
| "grad_norm": 0.2871047258377075, |
| "learning_rate": 6.78384807675079e-05, |
| "loss": 0.039, |
| "step": 7880 |
| }, |
| { |
| "epoch": 7.564717162032598, |
| "grad_norm": 0.3511698544025421, |
| "learning_rate": 6.775715595044822e-05, |
| "loss": 0.039, |
| "step": 7890 |
| }, |
| { |
| "epoch": 7.574304889741131, |
| "grad_norm": 0.23974575102329254, |
| "learning_rate": 6.767577734127209e-05, |
| "loss": 0.0438, |
| "step": 7900 |
| }, |
| { |
| "epoch": 7.583892617449664, |
| "grad_norm": 0.21983303129673004, |
| "learning_rate": 6.759434518650133e-05, |
| "loss": 0.043, |
| "step": 7910 |
| }, |
| { |
| "epoch": 7.593480345158198, |
| "grad_norm": 0.2729918658733368, |
| "learning_rate": 6.75128597328201e-05, |
| "loss": 0.0423, |
| "step": 7920 |
| }, |
| { |
| "epoch": 7.603068072866731, |
| "grad_norm": 0.34236469864845276, |
| "learning_rate": 6.743132122707394e-05, |
| "loss": 0.0443, |
| "step": 7930 |
| }, |
| { |
| "epoch": 7.612655800575264, |
| "grad_norm": 0.24948126077651978, |
| "learning_rate": 6.73497299162691e-05, |
| "loss": 0.037, |
| "step": 7940 |
| }, |
| { |
| "epoch": 7.622243528283796, |
| "grad_norm": 0.3250608444213867, |
| "learning_rate": 6.726808604757184e-05, |
| "loss": 0.0476, |
| "step": 7950 |
| }, |
| { |
| "epoch": 7.63183125599233, |
| "grad_norm": 0.2713163495063782, |
| "learning_rate": 6.718638986830758e-05, |
| "loss": 0.0391, |
| "step": 7960 |
| }, |
| { |
| "epoch": 7.641418983700863, |
| "grad_norm": 0.3012318015098572, |
| "learning_rate": 6.710464162596023e-05, |
| "loss": 0.0445, |
| "step": 7970 |
| }, |
| { |
| "epoch": 7.651006711409396, |
| "grad_norm": 0.4039930999279022, |
| "learning_rate": 6.702284156817143e-05, |
| "loss": 0.045, |
| "step": 7980 |
| }, |
| { |
| "epoch": 7.660594439117929, |
| "grad_norm": 0.22321514785289764, |
| "learning_rate": 6.694098994273977e-05, |
| "loss": 0.0395, |
| "step": 7990 |
| }, |
| { |
| "epoch": 7.6701821668264625, |
| "grad_norm": 0.3009647727012634, |
| "learning_rate": 6.685908699762002e-05, |
| "loss": 0.0425, |
| "step": 8000 |
| }, |
| { |
| "epoch": 7.679769894534996, |
| "grad_norm": 0.23675967752933502, |
| "learning_rate": 6.677713298092251e-05, |
| "loss": 0.043, |
| "step": 8010 |
| }, |
| { |
| "epoch": 7.689357622243528, |
| "grad_norm": 0.3453296422958374, |
| "learning_rate": 6.669512814091219e-05, |
| "loss": 0.0402, |
| "step": 8020 |
| }, |
| { |
| "epoch": 7.698945349952061, |
| "grad_norm": 0.35849177837371826, |
| "learning_rate": 6.6613072726008e-05, |
| "loss": 0.0412, |
| "step": 8030 |
| }, |
| { |
| "epoch": 7.7085330776605945, |
| "grad_norm": 0.2602018117904663, |
| "learning_rate": 6.65309669847821e-05, |
| "loss": 0.0456, |
| "step": 8040 |
| }, |
| { |
| "epoch": 7.718120805369128, |
| "grad_norm": 0.296563059091568, |
| "learning_rate": 6.64488111659591e-05, |
| "loss": 0.0354, |
| "step": 8050 |
| }, |
| { |
| "epoch": 7.727708533077661, |
| "grad_norm": 0.2529861629009247, |
| "learning_rate": 6.636660551841527e-05, |
| "loss": 0.046, |
| "step": 8060 |
| }, |
| { |
| "epoch": 7.737296260786193, |
| "grad_norm": 0.3589211404323578, |
| "learning_rate": 6.62843502911779e-05, |
| "loss": 0.0486, |
| "step": 8070 |
| }, |
| { |
| "epoch": 7.7468839884947265, |
| "grad_norm": 0.28562942147254944, |
| "learning_rate": 6.620204573342444e-05, |
| "loss": 0.04, |
| "step": 8080 |
| }, |
| { |
| "epoch": 7.75647171620326, |
| "grad_norm": 0.42662665247917175, |
| "learning_rate": 6.611969209448175e-05, |
| "loss": 0.0417, |
| "step": 8090 |
| }, |
| { |
| "epoch": 7.766059443911793, |
| "grad_norm": 0.3339911997318268, |
| "learning_rate": 6.603728962382542e-05, |
| "loss": 0.0344, |
| "step": 8100 |
| }, |
| { |
| "epoch": 7.775647171620326, |
| "grad_norm": 0.5838896632194519, |
| "learning_rate": 6.595483857107891e-05, |
| "loss": 0.0371, |
| "step": 8110 |
| }, |
| { |
| "epoch": 7.785234899328859, |
| "grad_norm": 0.30259743332862854, |
| "learning_rate": 6.587233918601292e-05, |
| "loss": 0.0392, |
| "step": 8120 |
| }, |
| { |
| "epoch": 7.794822627037393, |
| "grad_norm": 0.4095616340637207, |
| "learning_rate": 6.578979171854449e-05, |
| "loss": 0.034, |
| "step": 8130 |
| }, |
| { |
| "epoch": 7.804410354745925, |
| "grad_norm": 0.4089941084384918, |
| "learning_rate": 6.570719641873639e-05, |
| "loss": 0.0432, |
| "step": 8140 |
| }, |
| { |
| "epoch": 7.813998082454458, |
| "grad_norm": 0.22477275133132935, |
| "learning_rate": 6.562455353679624e-05, |
| "loss": 0.0482, |
| "step": 8150 |
| }, |
| { |
| "epoch": 7.823585810162991, |
| "grad_norm": 0.24884644150733948, |
| "learning_rate": 6.554186332307583e-05, |
| "loss": 0.0357, |
| "step": 8160 |
| }, |
| { |
| "epoch": 7.833173537871525, |
| "grad_norm": 0.40433716773986816, |
| "learning_rate": 6.545912602807029e-05, |
| "loss": 0.0393, |
| "step": 8170 |
| }, |
| { |
| "epoch": 7.842761265580058, |
| "grad_norm": 0.1963358074426651, |
| "learning_rate": 6.537634190241742e-05, |
| "loss": 0.0369, |
| "step": 8180 |
| }, |
| { |
| "epoch": 7.85234899328859, |
| "grad_norm": 0.30618107318878174, |
| "learning_rate": 6.529351119689688e-05, |
| "loss": 0.0365, |
| "step": 8190 |
| }, |
| { |
| "epoch": 7.861936720997123, |
| "grad_norm": 0.9213468432426453, |
| "learning_rate": 6.52106341624294e-05, |
| "loss": 0.0415, |
| "step": 8200 |
| }, |
| { |
| "epoch": 7.871524448705657, |
| "grad_norm": 0.41490432620048523, |
| "learning_rate": 6.512771105007609e-05, |
| "loss": 0.0432, |
| "step": 8210 |
| }, |
| { |
| "epoch": 7.88111217641419, |
| "grad_norm": 0.3433400094509125, |
| "learning_rate": 6.504474211103766e-05, |
| "loss": 0.0383, |
| "step": 8220 |
| }, |
| { |
| "epoch": 7.890699904122723, |
| "grad_norm": 0.2565036714076996, |
| "learning_rate": 6.496172759665357e-05, |
| "loss": 0.039, |
| "step": 8230 |
| }, |
| { |
| "epoch": 7.900287631831256, |
| "grad_norm": 0.36820822954177856, |
| "learning_rate": 6.487866775840141e-05, |
| "loss": 0.0373, |
| "step": 8240 |
| }, |
| { |
| "epoch": 7.9098753595397895, |
| "grad_norm": 0.26671302318573, |
| "learning_rate": 6.479556284789608e-05, |
| "loss": 0.0339, |
| "step": 8250 |
| }, |
| { |
| "epoch": 7.919463087248322, |
| "grad_norm": 0.3026654124259949, |
| "learning_rate": 6.471241311688894e-05, |
| "loss": 0.0363, |
| "step": 8260 |
| }, |
| { |
| "epoch": 7.929050814956855, |
| "grad_norm": 0.24896202981472015, |
| "learning_rate": 6.46292188172672e-05, |
| "loss": 0.0394, |
| "step": 8270 |
| }, |
| { |
| "epoch": 7.938638542665388, |
| "grad_norm": 0.3126719892024994, |
| "learning_rate": 6.454598020105306e-05, |
| "loss": 0.0439, |
| "step": 8280 |
| }, |
| { |
| "epoch": 7.9482262703739215, |
| "grad_norm": 0.33165302872657776, |
| "learning_rate": 6.446269752040295e-05, |
| "loss": 0.0393, |
| "step": 8290 |
| }, |
| { |
| "epoch": 7.957813998082455, |
| "grad_norm": 0.6648756265640259, |
| "learning_rate": 6.437937102760682e-05, |
| "loss": 0.0356, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.967401725790987, |
| "grad_norm": 0.24022682011127472, |
| "learning_rate": 6.429600097508732e-05, |
| "loss": 0.0406, |
| "step": 8310 |
| }, |
| { |
| "epoch": 7.97698945349952, |
| "grad_norm": 1.2279690504074097, |
| "learning_rate": 6.421258761539904e-05, |
| "loss": 0.0434, |
| "step": 8320 |
| }, |
| { |
| "epoch": 7.9865771812080535, |
| "grad_norm": 0.2868311107158661, |
| "learning_rate": 6.412913120122779e-05, |
| "loss": 0.0372, |
| "step": 8330 |
| }, |
| { |
| "epoch": 7.996164908916587, |
| "grad_norm": 0.25136950612068176, |
| "learning_rate": 6.40456319853898e-05, |
| "loss": 0.0405, |
| "step": 8340 |
| }, |
| { |
| "epoch": 8.00575263662512, |
| "grad_norm": 0.3662584722042084, |
| "learning_rate": 6.396209022083098e-05, |
| "loss": 0.041, |
| "step": 8350 |
| }, |
| { |
| "epoch": 8.015340364333653, |
| "grad_norm": 0.3134470283985138, |
| "learning_rate": 6.387850616062605e-05, |
| "loss": 0.0357, |
| "step": 8360 |
| }, |
| { |
| "epoch": 8.024928092042186, |
| "grad_norm": 0.3947703540325165, |
| "learning_rate": 6.379488005797797e-05, |
| "loss": 0.0384, |
| "step": 8370 |
| }, |
| { |
| "epoch": 8.03451581975072, |
| "grad_norm": 0.3272991478443146, |
| "learning_rate": 6.371121216621698e-05, |
| "loss": 0.0392, |
| "step": 8380 |
| }, |
| { |
| "epoch": 8.044103547459253, |
| "grad_norm": 1.1089465618133545, |
| "learning_rate": 6.362750273879996e-05, |
| "loss": 0.047, |
| "step": 8390 |
| }, |
| { |
| "epoch": 8.053691275167786, |
| "grad_norm": 0.2133249044418335, |
| "learning_rate": 6.354375202930958e-05, |
| "loss": 0.0333, |
| "step": 8400 |
| }, |
| { |
| "epoch": 8.063279002876317, |
| "grad_norm": 0.3814240097999573, |
| "learning_rate": 6.345996029145356e-05, |
| "loss": 0.0419, |
| "step": 8410 |
| }, |
| { |
| "epoch": 8.07286673058485, |
| "grad_norm": 0.38257062435150146, |
| "learning_rate": 6.337612777906398e-05, |
| "loss": 0.0412, |
| "step": 8420 |
| }, |
| { |
| "epoch": 8.082454458293384, |
| "grad_norm": 0.20826545357704163, |
| "learning_rate": 6.329225474609633e-05, |
| "loss": 0.0402, |
| "step": 8430 |
| }, |
| { |
| "epoch": 8.092042186001917, |
| "grad_norm": 0.2289332151412964, |
| "learning_rate": 6.320834144662897e-05, |
| "loss": 0.0392, |
| "step": 8440 |
| }, |
| { |
| "epoch": 8.10162991371045, |
| "grad_norm": 0.29565075039863586, |
| "learning_rate": 6.312438813486211e-05, |
| "loss": 0.0347, |
| "step": 8450 |
| }, |
| { |
| "epoch": 8.111217641418984, |
| "grad_norm": 0.21872690320014954, |
| "learning_rate": 6.30403950651173e-05, |
| "loss": 0.0357, |
| "step": 8460 |
| }, |
| { |
| "epoch": 8.120805369127517, |
| "grad_norm": 0.24760524928569794, |
| "learning_rate": 6.295636249183643e-05, |
| "loss": 0.0331, |
| "step": 8470 |
| }, |
| { |
| "epoch": 8.13039309683605, |
| "grad_norm": 0.2806303799152374, |
| "learning_rate": 6.287229066958113e-05, |
| "loss": 0.0393, |
| "step": 8480 |
| }, |
| { |
| "epoch": 8.139980824544583, |
| "grad_norm": 0.45841529965400696, |
| "learning_rate": 6.278817985303184e-05, |
| "loss": 0.0434, |
| "step": 8490 |
| }, |
| { |
| "epoch": 8.149568552253116, |
| "grad_norm": 0.21284928917884827, |
| "learning_rate": 6.270403029698722e-05, |
| "loss": 0.0311, |
| "step": 8500 |
| }, |
| { |
| "epoch": 8.15915627996165, |
| "grad_norm": 0.312191367149353, |
| "learning_rate": 6.261984225636324e-05, |
| "loss": 0.0409, |
| "step": 8510 |
| }, |
| { |
| "epoch": 8.168744007670183, |
| "grad_norm": 0.38339605927467346, |
| "learning_rate": 6.253561598619247e-05, |
| "loss": 0.0367, |
| "step": 8520 |
| }, |
| { |
| "epoch": 8.178331735378714, |
| "grad_norm": 0.24168361723423004, |
| "learning_rate": 6.245135174162323e-05, |
| "loss": 0.0419, |
| "step": 8530 |
| }, |
| { |
| "epoch": 8.187919463087248, |
| "grad_norm": 0.3038835823535919, |
| "learning_rate": 6.236704977791898e-05, |
| "loss": 0.0349, |
| "step": 8540 |
| }, |
| { |
| "epoch": 8.19750719079578, |
| "grad_norm": 0.32537156343460083, |
| "learning_rate": 6.228271035045735e-05, |
| "loss": 0.0347, |
| "step": 8550 |
| }, |
| { |
| "epoch": 8.207094918504314, |
| "grad_norm": 0.2789401412010193, |
| "learning_rate": 6.21983337147295e-05, |
| "loss": 0.0339, |
| "step": 8560 |
| }, |
| { |
| "epoch": 8.216682646212847, |
| "grad_norm": 0.4282236397266388, |
| "learning_rate": 6.211392012633932e-05, |
| "loss": 0.0352, |
| "step": 8570 |
| }, |
| { |
| "epoch": 8.22627037392138, |
| "grad_norm": 0.3608817458152771, |
| "learning_rate": 6.202946984100261e-05, |
| "loss": 0.0373, |
| "step": 8580 |
| }, |
| { |
| "epoch": 8.235858101629914, |
| "grad_norm": 0.29480835795402527, |
| "learning_rate": 6.194498311454636e-05, |
| "loss": 0.0321, |
| "step": 8590 |
| }, |
| { |
| "epoch": 8.245445829338447, |
| "grad_norm": 0.27964943647384644, |
| "learning_rate": 6.186046020290792e-05, |
| "loss": 0.0428, |
| "step": 8600 |
| }, |
| { |
| "epoch": 8.25503355704698, |
| "grad_norm": 0.2138575315475464, |
| "learning_rate": 6.177590136213429e-05, |
| "loss": 0.0344, |
| "step": 8610 |
| }, |
| { |
| "epoch": 8.264621284755513, |
| "grad_norm": 0.3693723678588867, |
| "learning_rate": 6.169130684838132e-05, |
| "loss": 0.0449, |
| "step": 8620 |
| }, |
| { |
| "epoch": 8.274209012464047, |
| "grad_norm": 0.24271826446056366, |
| "learning_rate": 6.160667691791287e-05, |
| "loss": 0.0414, |
| "step": 8630 |
| }, |
| { |
| "epoch": 8.28379674017258, |
| "grad_norm": 0.27349698543548584, |
| "learning_rate": 6.152201182710016e-05, |
| "loss": 0.0437, |
| "step": 8640 |
| }, |
| { |
| "epoch": 8.293384467881111, |
| "grad_norm": 0.265661358833313, |
| "learning_rate": 6.143731183242085e-05, |
| "loss": 0.0402, |
| "step": 8650 |
| }, |
| { |
| "epoch": 8.302972195589644, |
| "grad_norm": 0.3084318935871124, |
| "learning_rate": 6.13525771904584e-05, |
| "loss": 0.0424, |
| "step": 8660 |
| }, |
| { |
| "epoch": 8.312559923298178, |
| "grad_norm": 0.42005741596221924, |
| "learning_rate": 6.126780815790116e-05, |
| "loss": 0.0386, |
| "step": 8670 |
| }, |
| { |
| "epoch": 8.322147651006711, |
| "grad_norm": 0.349277526140213, |
| "learning_rate": 6.118300499154174e-05, |
| "loss": 0.0355, |
| "step": 8680 |
| }, |
| { |
| "epoch": 8.331735378715244, |
| "grad_norm": 0.3930281102657318, |
| "learning_rate": 6.109816794827607e-05, |
| "loss": 0.0386, |
| "step": 8690 |
| }, |
| { |
| "epoch": 8.341323106423777, |
| "grad_norm": 0.2631587088108063, |
| "learning_rate": 6.101329728510278e-05, |
| "loss": 0.0376, |
| "step": 8700 |
| }, |
| { |
| "epoch": 8.35091083413231, |
| "grad_norm": 0.3070177137851715, |
| "learning_rate": 6.0928393259122285e-05, |
| "loss": 0.039, |
| "step": 8710 |
| }, |
| { |
| "epoch": 8.360498561840844, |
| "grad_norm": 0.3494318425655365, |
| "learning_rate": 6.084345612753611e-05, |
| "loss": 0.0405, |
| "step": 8720 |
| }, |
| { |
| "epoch": 8.370086289549377, |
| "grad_norm": 0.2996184825897217, |
| "learning_rate": 6.0758486147646035e-05, |
| "loss": 0.0386, |
| "step": 8730 |
| }, |
| { |
| "epoch": 8.37967401725791, |
| "grad_norm": 0.39091756939888, |
| "learning_rate": 6.0673483576853365e-05, |
| "loss": 0.038, |
| "step": 8740 |
| }, |
| { |
| "epoch": 8.389261744966444, |
| "grad_norm": 0.28855571150779724, |
| "learning_rate": 6.0588448672658125e-05, |
| "loss": 0.0403, |
| "step": 8750 |
| }, |
| { |
| "epoch": 8.398849472674977, |
| "grad_norm": 0.25725746154785156, |
| "learning_rate": 6.05033816926583e-05, |
| "loss": 0.0338, |
| "step": 8760 |
| }, |
| { |
| "epoch": 8.40843720038351, |
| "grad_norm": 0.2737105190753937, |
| "learning_rate": 6.041828289454903e-05, |
| "loss": 0.0417, |
| "step": 8770 |
| }, |
| { |
| "epoch": 8.418024928092041, |
| "grad_norm": 0.3197145462036133, |
| "learning_rate": 6.033315253612186e-05, |
| "loss": 0.0428, |
| "step": 8780 |
| }, |
| { |
| "epoch": 8.427612655800575, |
| "grad_norm": 0.35713446140289307, |
| "learning_rate": 6.0247990875263914e-05, |
| "loss": 0.0376, |
| "step": 8790 |
| }, |
| { |
| "epoch": 8.437200383509108, |
| "grad_norm": 0.354390949010849, |
| "learning_rate": 6.016279816995718e-05, |
| "loss": 0.0384, |
| "step": 8800 |
| }, |
| { |
| "epoch": 8.446788111217641, |
| "grad_norm": 0.31738895177841187, |
| "learning_rate": 6.0077574678277636e-05, |
| "loss": 0.048, |
| "step": 8810 |
| }, |
| { |
| "epoch": 8.456375838926174, |
| "grad_norm": 0.28505873680114746, |
| "learning_rate": 5.999232065839456e-05, |
| "loss": 0.0353, |
| "step": 8820 |
| }, |
| { |
| "epoch": 8.465963566634708, |
| "grad_norm": 0.3551139831542969, |
| "learning_rate": 5.990703636856974e-05, |
| "loss": 0.0422, |
| "step": 8830 |
| }, |
| { |
| "epoch": 8.47555129434324, |
| "grad_norm": 0.23753251135349274, |
| "learning_rate": 5.982172206715656e-05, |
| "loss": 0.0356, |
| "step": 8840 |
| }, |
| { |
| "epoch": 8.485139022051774, |
| "grad_norm": 0.3025340735912323, |
| "learning_rate": 5.973637801259944e-05, |
| "loss": 0.0416, |
| "step": 8850 |
| }, |
| { |
| "epoch": 8.494726749760307, |
| "grad_norm": 0.3358081579208374, |
| "learning_rate": 5.9651004463432826e-05, |
| "loss": 0.0406, |
| "step": 8860 |
| }, |
| { |
| "epoch": 8.50431447746884, |
| "grad_norm": 0.2748364508152008, |
| "learning_rate": 5.95656016782806e-05, |
| "loss": 0.0355, |
| "step": 8870 |
| }, |
| { |
| "epoch": 8.513902205177374, |
| "grad_norm": 0.27150842547416687, |
| "learning_rate": 5.948016991585514e-05, |
| "loss": 0.0356, |
| "step": 8880 |
| }, |
| { |
| "epoch": 8.523489932885907, |
| "grad_norm": 0.2812124490737915, |
| "learning_rate": 5.9394709434956664e-05, |
| "loss": 0.0419, |
| "step": 8890 |
| }, |
| { |
| "epoch": 8.53307766059444, |
| "grad_norm": 0.29283568263053894, |
| "learning_rate": 5.9309220494472314e-05, |
| "loss": 0.0408, |
| "step": 8900 |
| }, |
| { |
| "epoch": 8.542665388302972, |
| "grad_norm": 0.4069705605506897, |
| "learning_rate": 5.9223703353375534e-05, |
| "loss": 0.0425, |
| "step": 8910 |
| }, |
| { |
| "epoch": 8.552253116011505, |
| "grad_norm": 0.2776540219783783, |
| "learning_rate": 5.913815827072513e-05, |
| "loss": 0.0365, |
| "step": 8920 |
| }, |
| { |
| "epoch": 8.561840843720038, |
| "grad_norm": 0.2777857482433319, |
| "learning_rate": 5.905258550566458e-05, |
| "loss": 0.0368, |
| "step": 8930 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.3018902838230133, |
| "learning_rate": 5.896698531742122e-05, |
| "loss": 0.0377, |
| "step": 8940 |
| }, |
| { |
| "epoch": 8.581016299137104, |
| "grad_norm": 0.622887134552002, |
| "learning_rate": 5.888135796530544e-05, |
| "loss": 0.0448, |
| "step": 8950 |
| }, |
| { |
| "epoch": 8.590604026845638, |
| "grad_norm": 0.28407829999923706, |
| "learning_rate": 5.879570370870995e-05, |
| "loss": 0.0373, |
| "step": 8960 |
| }, |
| { |
| "epoch": 8.60019175455417, |
| "grad_norm": 0.2791987955570221, |
| "learning_rate": 5.871002280710892e-05, |
| "loss": 0.0402, |
| "step": 8970 |
| }, |
| { |
| "epoch": 8.609779482262704, |
| "grad_norm": 0.27533990144729614, |
| "learning_rate": 5.862431552005729e-05, |
| "loss": 0.0434, |
| "step": 8980 |
| }, |
| { |
| "epoch": 8.619367209971237, |
| "grad_norm": 0.27701878547668457, |
| "learning_rate": 5.85385821071899e-05, |
| "loss": 0.0383, |
| "step": 8990 |
| }, |
| { |
| "epoch": 8.62895493767977, |
| "grad_norm": 0.269197016954422, |
| "learning_rate": 5.845282282822071e-05, |
| "loss": 0.0389, |
| "step": 9000 |
| }, |
| { |
| "epoch": 8.638542665388304, |
| "grad_norm": 0.3775997757911682, |
| "learning_rate": 5.836703794294208e-05, |
| "loss": 0.0401, |
| "step": 9010 |
| }, |
| { |
| "epoch": 8.648130393096835, |
| "grad_norm": 0.21519199013710022, |
| "learning_rate": 5.828122771122392e-05, |
| "loss": 0.0326, |
| "step": 9020 |
| }, |
| { |
| "epoch": 8.657718120805368, |
| "grad_norm": 0.4001868963241577, |
| "learning_rate": 5.819539239301291e-05, |
| "loss": 0.04, |
| "step": 9030 |
| }, |
| { |
| "epoch": 8.667305848513902, |
| "grad_norm": 0.19594238698482513, |
| "learning_rate": 5.810953224833177e-05, |
| "loss": 0.0301, |
| "step": 9040 |
| }, |
| { |
| "epoch": 8.676893576222435, |
| "grad_norm": 0.19823068380355835, |
| "learning_rate": 5.802364753727836e-05, |
| "loss": 0.0344, |
| "step": 9050 |
| }, |
| { |
| "epoch": 8.686481303930968, |
| "grad_norm": 0.26146700978279114, |
| "learning_rate": 5.793773852002502e-05, |
| "loss": 0.0444, |
| "step": 9060 |
| }, |
| { |
| "epoch": 8.696069031639501, |
| "grad_norm": 0.36863768100738525, |
| "learning_rate": 5.7851805456817677e-05, |
| "loss": 0.0364, |
| "step": 9070 |
| }, |
| { |
| "epoch": 8.705656759348035, |
| "grad_norm": 0.2518344521522522, |
| "learning_rate": 5.7765848607975136e-05, |
| "loss": 0.0394, |
| "step": 9080 |
| }, |
| { |
| "epoch": 8.715244487056568, |
| "grad_norm": 0.2473488301038742, |
| "learning_rate": 5.767986823388825e-05, |
| "loss": 0.0326, |
| "step": 9090 |
| }, |
| { |
| "epoch": 8.724832214765101, |
| "grad_norm": 0.20669348537921906, |
| "learning_rate": 5.7593864595019096e-05, |
| "loss": 0.0408, |
| "step": 9100 |
| }, |
| { |
| "epoch": 8.734419942473634, |
| "grad_norm": 0.32804393768310547, |
| "learning_rate": 5.750783795190029e-05, |
| "loss": 0.0388, |
| "step": 9110 |
| }, |
| { |
| "epoch": 8.744007670182167, |
| "grad_norm": 0.18472160398960114, |
| "learning_rate": 5.7421788565134074e-05, |
| "loss": 0.0395, |
| "step": 9120 |
| }, |
| { |
| "epoch": 8.7535953978907, |
| "grad_norm": 0.3553003668785095, |
| "learning_rate": 5.733571669539167e-05, |
| "loss": 0.0432, |
| "step": 9130 |
| }, |
| { |
| "epoch": 8.763183125599234, |
| "grad_norm": 0.2398902177810669, |
| "learning_rate": 5.72496226034123e-05, |
| "loss": 0.0354, |
| "step": 9140 |
| }, |
| { |
| "epoch": 8.772770853307765, |
| "grad_norm": 0.2900802195072174, |
| "learning_rate": 5.716350655000261e-05, |
| "loss": 0.0449, |
| "step": 9150 |
| }, |
| { |
| "epoch": 8.782358581016299, |
| "grad_norm": 0.17919373512268066, |
| "learning_rate": 5.707736879603568e-05, |
| "loss": 0.0413, |
| "step": 9160 |
| }, |
| { |
| "epoch": 8.791946308724832, |
| "grad_norm": 0.2598424255847931, |
| "learning_rate": 5.6991209602450424e-05, |
| "loss": 0.0432, |
| "step": 9170 |
| }, |
| { |
| "epoch": 8.801534036433365, |
| "grad_norm": 0.4794408082962036, |
| "learning_rate": 5.69050292302506e-05, |
| "loss": 0.0392, |
| "step": 9180 |
| }, |
| { |
| "epoch": 8.811121764141898, |
| "grad_norm": 0.3420094847679138, |
| "learning_rate": 5.6818827940504225e-05, |
| "loss": 0.0335, |
| "step": 9190 |
| }, |
| { |
| "epoch": 8.820709491850431, |
| "grad_norm": 1.9920908212661743, |
| "learning_rate": 5.673260599434259e-05, |
| "loss": 0.0427, |
| "step": 9200 |
| }, |
| { |
| "epoch": 8.830297219558965, |
| "grad_norm": 0.28250133991241455, |
| "learning_rate": 5.664636365295965e-05, |
| "loss": 0.0349, |
| "step": 9210 |
| }, |
| { |
| "epoch": 8.839884947267498, |
| "grad_norm": 0.22743001580238342, |
| "learning_rate": 5.656010117761105e-05, |
| "loss": 0.0401, |
| "step": 9220 |
| }, |
| { |
| "epoch": 8.849472674976031, |
| "grad_norm": 0.2771368622779846, |
| "learning_rate": 5.647381882961349e-05, |
| "loss": 0.0424, |
| "step": 9230 |
| }, |
| { |
| "epoch": 8.859060402684564, |
| "grad_norm": 0.38394448161125183, |
| "learning_rate": 5.638751687034387e-05, |
| "loss": 0.0357, |
| "step": 9240 |
| }, |
| { |
| "epoch": 8.868648130393098, |
| "grad_norm": 0.22416839003562927, |
| "learning_rate": 5.630119556123848e-05, |
| "loss": 0.0347, |
| "step": 9250 |
| }, |
| { |
| "epoch": 8.87823585810163, |
| "grad_norm": 0.1746525913476944, |
| "learning_rate": 5.6214855163792224e-05, |
| "loss": 0.0366, |
| "step": 9260 |
| }, |
| { |
| "epoch": 8.887823585810162, |
| "grad_norm": 0.26215359568595886, |
| "learning_rate": 5.6128495939557835e-05, |
| "loss": 0.0411, |
| "step": 9270 |
| }, |
| { |
| "epoch": 8.897411313518695, |
| "grad_norm": 0.3498288691043854, |
| "learning_rate": 5.604211815014509e-05, |
| "loss": 0.0404, |
| "step": 9280 |
| }, |
| { |
| "epoch": 8.906999041227229, |
| "grad_norm": 0.19935335218906403, |
| "learning_rate": 5.595572205721999e-05, |
| "loss": 0.0356, |
| "step": 9290 |
| }, |
| { |
| "epoch": 8.916586768935762, |
| "grad_norm": 0.3347182869911194, |
| "learning_rate": 5.5869307922504e-05, |
| "loss": 0.0393, |
| "step": 9300 |
| }, |
| { |
| "epoch": 8.926174496644295, |
| "grad_norm": 0.3638782203197479, |
| "learning_rate": 5.578287600777321e-05, |
| "loss": 0.0324, |
| "step": 9310 |
| }, |
| { |
| "epoch": 8.935762224352828, |
| "grad_norm": 0.2433633953332901, |
| "learning_rate": 5.569642657485761e-05, |
| "loss": 0.0351, |
| "step": 9320 |
| }, |
| { |
| "epoch": 8.945349952061362, |
| "grad_norm": 0.2311711609363556, |
| "learning_rate": 5.560995988564023e-05, |
| "loss": 0.0386, |
| "step": 9330 |
| }, |
| { |
| "epoch": 8.954937679769895, |
| "grad_norm": 0.2803432047367096, |
| "learning_rate": 5.552347620205638e-05, |
| "loss": 0.0461, |
| "step": 9340 |
| }, |
| { |
| "epoch": 8.964525407478428, |
| "grad_norm": 0.25586047768592834, |
| "learning_rate": 5.5436975786092873e-05, |
| "loss": 0.0384, |
| "step": 9350 |
| }, |
| { |
| "epoch": 8.974113135186961, |
| "grad_norm": 0.3626959025859833, |
| "learning_rate": 5.535045889978717e-05, |
| "loss": 0.0374, |
| "step": 9360 |
| }, |
| { |
| "epoch": 8.983700862895494, |
| "grad_norm": 0.3548148572444916, |
| "learning_rate": 5.526392580522666e-05, |
| "loss": 0.0416, |
| "step": 9370 |
| }, |
| { |
| "epoch": 8.993288590604028, |
| "grad_norm": 2.09843111038208, |
| "learning_rate": 5.5177376764547814e-05, |
| "loss": 0.0434, |
| "step": 9380 |
| }, |
| { |
| "epoch": 9.002876318312559, |
| "grad_norm": 0.4216479957103729, |
| "learning_rate": 5.5090812039935426e-05, |
| "loss": 0.0404, |
| "step": 9390 |
| }, |
| { |
| "epoch": 9.012464046021092, |
| "grad_norm": 0.292222261428833, |
| "learning_rate": 5.5004231893621774e-05, |
| "loss": 0.0362, |
| "step": 9400 |
| }, |
| { |
| "epoch": 9.022051773729626, |
| "grad_norm": 0.37306836247444153, |
| "learning_rate": 5.491763658788589e-05, |
| "loss": 0.0367, |
| "step": 9410 |
| }, |
| { |
| "epoch": 9.031639501438159, |
| "grad_norm": 0.2755350172519684, |
| "learning_rate": 5.483102638505269e-05, |
| "loss": 0.0401, |
| "step": 9420 |
| }, |
| { |
| "epoch": 9.041227229146692, |
| "grad_norm": 0.2616848349571228, |
| "learning_rate": 5.4744401547492254e-05, |
| "loss": 0.0337, |
| "step": 9430 |
| }, |
| { |
| "epoch": 9.050814956855225, |
| "grad_norm": 0.28111451864242554, |
| "learning_rate": 5.465776233761896e-05, |
| "loss": 0.0384, |
| "step": 9440 |
| }, |
| { |
| "epoch": 9.060402684563758, |
| "grad_norm": 0.23586216568946838, |
| "learning_rate": 5.4571109017890753e-05, |
| "loss": 0.0405, |
| "step": 9450 |
| }, |
| { |
| "epoch": 9.069990412272292, |
| "grad_norm": 0.3019304871559143, |
| "learning_rate": 5.44844418508083e-05, |
| "loss": 0.0389, |
| "step": 9460 |
| }, |
| { |
| "epoch": 9.079578139980825, |
| "grad_norm": 0.3531333804130554, |
| "learning_rate": 5.4397761098914254e-05, |
| "loss": 0.0334, |
| "step": 9470 |
| }, |
| { |
| "epoch": 9.089165867689358, |
| "grad_norm": 0.40830254554748535, |
| "learning_rate": 5.431106702479235e-05, |
| "loss": 0.0357, |
| "step": 9480 |
| }, |
| { |
| "epoch": 9.098753595397891, |
| "grad_norm": 0.44957104325294495, |
| "learning_rate": 5.4224359891066765e-05, |
| "loss": 0.039, |
| "step": 9490 |
| }, |
| { |
| "epoch": 9.108341323106425, |
| "grad_norm": 0.6519899964332581, |
| "learning_rate": 5.413763996040117e-05, |
| "loss": 0.0402, |
| "step": 9500 |
| }, |
| { |
| "epoch": 9.117929050814958, |
| "grad_norm": 0.4034676253795624, |
| "learning_rate": 5.405090749549804e-05, |
| "loss": 0.0459, |
| "step": 9510 |
| }, |
| { |
| "epoch": 9.12751677852349, |
| "grad_norm": 0.3996933698654175, |
| "learning_rate": 5.396416275909779e-05, |
| "loss": 0.0398, |
| "step": 9520 |
| }, |
| { |
| "epoch": 9.137104506232022, |
| "grad_norm": 0.16408595442771912, |
| "learning_rate": 5.387740601397806e-05, |
| "loss": 0.0358, |
| "step": 9530 |
| }, |
| { |
| "epoch": 9.146692233940556, |
| "grad_norm": 0.3471783995628357, |
| "learning_rate": 5.379063752295282e-05, |
| "loss": 0.0391, |
| "step": 9540 |
| }, |
| { |
| "epoch": 9.156279961649089, |
| "grad_norm": 0.4107268452644348, |
| "learning_rate": 5.370385754887164e-05, |
| "loss": 0.0424, |
| "step": 9550 |
| }, |
| { |
| "epoch": 9.165867689357622, |
| "grad_norm": 0.32927405834198, |
| "learning_rate": 5.3617066354618874e-05, |
| "loss": 0.0453, |
| "step": 9560 |
| }, |
| { |
| "epoch": 9.175455417066155, |
| "grad_norm": 0.41520607471466064, |
| "learning_rate": 5.3530264203112856e-05, |
| "loss": 0.0392, |
| "step": 9570 |
| }, |
| { |
| "epoch": 9.185043144774689, |
| "grad_norm": 0.3985765278339386, |
| "learning_rate": 5.344345135730513e-05, |
| "loss": 0.0364, |
| "step": 9580 |
| }, |
| { |
| "epoch": 9.194630872483222, |
| "grad_norm": 0.344056099653244, |
| "learning_rate": 5.335662808017964e-05, |
| "loss": 0.0444, |
| "step": 9590 |
| }, |
| { |
| "epoch": 9.204218600191755, |
| "grad_norm": 0.3382169008255005, |
| "learning_rate": 5.32697946347519e-05, |
| "loss": 0.0375, |
| "step": 9600 |
| }, |
| { |
| "epoch": 9.213806327900288, |
| "grad_norm": 0.3668196499347687, |
| "learning_rate": 5.318295128406825e-05, |
| "loss": 0.0427, |
| "step": 9610 |
| }, |
| { |
| "epoch": 9.223394055608821, |
| "grad_norm": 0.22777938842773438, |
| "learning_rate": 5.3096098291205044e-05, |
| "loss": 0.0362, |
| "step": 9620 |
| }, |
| { |
| "epoch": 9.232981783317355, |
| "grad_norm": 0.2992532551288605, |
| "learning_rate": 5.300923591926783e-05, |
| "loss": 0.0344, |
| "step": 9630 |
| }, |
| { |
| "epoch": 9.242569511025886, |
| "grad_norm": 0.2733289301395416, |
| "learning_rate": 5.292236443139056e-05, |
| "loss": 0.0318, |
| "step": 9640 |
| }, |
| { |
| "epoch": 9.25215723873442, |
| "grad_norm": 0.2972942292690277, |
| "learning_rate": 5.283548409073482e-05, |
| "loss": 0.0357, |
| "step": 9650 |
| }, |
| { |
| "epoch": 9.261744966442953, |
| "grad_norm": 0.3721420466899872, |
| "learning_rate": 5.274859516048901e-05, |
| "loss": 0.0356, |
| "step": 9660 |
| }, |
| { |
| "epoch": 9.271332694151486, |
| "grad_norm": 0.13791558146476746, |
| "learning_rate": 5.266169790386756e-05, |
| "loss": 0.0345, |
| "step": 9670 |
| }, |
| { |
| "epoch": 9.280920421860019, |
| "grad_norm": 0.2645628750324249, |
| "learning_rate": 5.257479258411008e-05, |
| "loss": 0.0426, |
| "step": 9680 |
| }, |
| { |
| "epoch": 9.290508149568552, |
| "grad_norm": 0.3136797845363617, |
| "learning_rate": 5.248787946448065e-05, |
| "loss": 0.0354, |
| "step": 9690 |
| }, |
| { |
| "epoch": 9.300095877277085, |
| "grad_norm": 0.25481873750686646, |
| "learning_rate": 5.240095880826695e-05, |
| "loss": 0.0401, |
| "step": 9700 |
| }, |
| { |
| "epoch": 9.309683604985619, |
| "grad_norm": 0.24243059754371643, |
| "learning_rate": 5.231403087877955e-05, |
| "loss": 0.0422, |
| "step": 9710 |
| }, |
| { |
| "epoch": 9.319271332694152, |
| "grad_norm": 0.22734355926513672, |
| "learning_rate": 5.2227095939350966e-05, |
| "loss": 0.0409, |
| "step": 9720 |
| }, |
| { |
| "epoch": 9.328859060402685, |
| "grad_norm": 0.35372641682624817, |
| "learning_rate": 5.214015425333502e-05, |
| "loss": 0.0413, |
| "step": 9730 |
| }, |
| { |
| "epoch": 9.338446788111218, |
| "grad_norm": 0.2218106985092163, |
| "learning_rate": 5.205320608410591e-05, |
| "loss": 0.0385, |
| "step": 9740 |
| }, |
| { |
| "epoch": 9.348034515819752, |
| "grad_norm": 0.8550918698310852, |
| "learning_rate": 5.196625169505755e-05, |
| "loss": 0.0383, |
| "step": 9750 |
| }, |
| { |
| "epoch": 9.357622243528283, |
| "grad_norm": 0.325469434261322, |
| "learning_rate": 5.18792913496026e-05, |
| "loss": 0.0377, |
| "step": 9760 |
| }, |
| { |
| "epoch": 9.367209971236816, |
| "grad_norm": 0.2887977063655853, |
| "learning_rate": 5.1792325311171875e-05, |
| "loss": 0.039, |
| "step": 9770 |
| }, |
| { |
| "epoch": 9.37679769894535, |
| "grad_norm": 0.267398476600647, |
| "learning_rate": 5.1705353843213336e-05, |
| "loss": 0.0351, |
| "step": 9780 |
| }, |
| { |
| "epoch": 9.386385426653883, |
| "grad_norm": 0.3469073176383972, |
| "learning_rate": 5.1618377209191447e-05, |
| "loss": 0.0373, |
| "step": 9790 |
| }, |
| { |
| "epoch": 9.395973154362416, |
| "grad_norm": 0.399781733751297, |
| "learning_rate": 5.1531395672586314e-05, |
| "loss": 0.0345, |
| "step": 9800 |
| }, |
| { |
| "epoch": 9.405560882070949, |
| "grad_norm": 0.3050326704978943, |
| "learning_rate": 5.144440949689287e-05, |
| "loss": 0.0436, |
| "step": 9810 |
| }, |
| { |
| "epoch": 9.415148609779482, |
| "grad_norm": 0.22124247252941132, |
| "learning_rate": 5.135741894562014e-05, |
| "loss": 0.0384, |
| "step": 9820 |
| }, |
| { |
| "epoch": 9.424736337488016, |
| "grad_norm": 0.32914167642593384, |
| "learning_rate": 5.127042428229036e-05, |
| "loss": 0.0395, |
| "step": 9830 |
| }, |
| { |
| "epoch": 9.434324065196549, |
| "grad_norm": 0.302157998085022, |
| "learning_rate": 5.118342577043829e-05, |
| "loss": 0.0446, |
| "step": 9840 |
| }, |
| { |
| "epoch": 9.443911792905082, |
| "grad_norm": 0.29756733775138855, |
| "learning_rate": 5.1096423673610246e-05, |
| "loss": 0.035, |
| "step": 9850 |
| }, |
| { |
| "epoch": 9.453499520613615, |
| "grad_norm": 0.21626603603363037, |
| "learning_rate": 5.100941825536353e-05, |
| "loss": 0.0487, |
| "step": 9860 |
| }, |
| { |
| "epoch": 9.463087248322148, |
| "grad_norm": 0.31502407789230347, |
| "learning_rate": 5.092240977926538e-05, |
| "loss": 0.0384, |
| "step": 9870 |
| }, |
| { |
| "epoch": 9.47267497603068, |
| "grad_norm": 0.3153168261051178, |
| "learning_rate": 5.083539850889239e-05, |
| "loss": 0.0377, |
| "step": 9880 |
| }, |
| { |
| "epoch": 9.482262703739213, |
| "grad_norm": 0.3235209584236145, |
| "learning_rate": 5.074838470782957e-05, |
| "loss": 0.0402, |
| "step": 9890 |
| }, |
| { |
| "epoch": 9.491850431447746, |
| "grad_norm": 0.4194275438785553, |
| "learning_rate": 5.066136863966963e-05, |
| "loss": 0.0349, |
| "step": 9900 |
| }, |
| { |
| "epoch": 9.50143815915628, |
| "grad_norm": 0.26690346002578735, |
| "learning_rate": 5.0574350568012086e-05, |
| "loss": 0.037, |
| "step": 9910 |
| }, |
| { |
| "epoch": 9.511025886864813, |
| "grad_norm": 0.3191596567630768, |
| "learning_rate": 5.0487330756462624e-05, |
| "loss": 0.0427, |
| "step": 9920 |
| }, |
| { |
| "epoch": 9.520613614573346, |
| "grad_norm": 0.21837887167930603, |
| "learning_rate": 5.040030946863209e-05, |
| "loss": 0.031, |
| "step": 9930 |
| }, |
| { |
| "epoch": 9.53020134228188, |
| "grad_norm": 0.28201964497566223, |
| "learning_rate": 5.0313286968135884e-05, |
| "loss": 0.0348, |
| "step": 9940 |
| }, |
| { |
| "epoch": 9.539789069990412, |
| "grad_norm": 0.6378640532493591, |
| "learning_rate": 5.022626351859305e-05, |
| "loss": 0.0392, |
| "step": 9950 |
| }, |
| { |
| "epoch": 9.549376797698946, |
| "grad_norm": 0.27877506613731384, |
| "learning_rate": 5.01392393836255e-05, |
| "loss": 0.0435, |
| "step": 9960 |
| }, |
| { |
| "epoch": 9.558964525407479, |
| "grad_norm": 0.21583925187587738, |
| "learning_rate": 5.0052214826857225e-05, |
| "loss": 0.036, |
| "step": 9970 |
| }, |
| { |
| "epoch": 9.568552253116012, |
| "grad_norm": 0.3575581908226013, |
| "learning_rate": 4.996519011191351e-05, |
| "loss": 0.0344, |
| "step": 9980 |
| }, |
| { |
| "epoch": 9.578139980824545, |
| "grad_norm": 0.2446652501821518, |
| "learning_rate": 4.9878165502420104e-05, |
| "loss": 0.0382, |
| "step": 9990 |
| }, |
| { |
| "epoch": 9.587727708533077, |
| "grad_norm": 0.1690993756055832, |
| "learning_rate": 4.979114126200244e-05, |
| "loss": 0.0392, |
| "step": 10000 |
| }, |
| { |
| "epoch": 9.59731543624161, |
| "grad_norm": 0.3892661929130554, |
| "learning_rate": 4.970411765428484e-05, |
| "loss": 0.0366, |
| "step": 10010 |
| }, |
| { |
| "epoch": 9.606903163950143, |
| "grad_norm": 0.26752811670303345, |
| "learning_rate": 4.961709494288966e-05, |
| "loss": 0.0377, |
| "step": 10020 |
| }, |
| { |
| "epoch": 9.616490891658676, |
| "grad_norm": 0.3104531466960907, |
| "learning_rate": 4.9530073391436654e-05, |
| "loss": 0.0371, |
| "step": 10030 |
| }, |
| { |
| "epoch": 9.62607861936721, |
| "grad_norm": 0.3081854283809662, |
| "learning_rate": 4.944305326354194e-05, |
| "loss": 0.0377, |
| "step": 10040 |
| }, |
| { |
| "epoch": 9.635666347075743, |
| "grad_norm": 0.32180699706077576, |
| "learning_rate": 4.935603482281739e-05, |
| "loss": 0.0364, |
| "step": 10050 |
| }, |
| { |
| "epoch": 9.645254074784276, |
| "grad_norm": 0.30046379566192627, |
| "learning_rate": 4.926901833286974e-05, |
| "loss": 0.0341, |
| "step": 10060 |
| }, |
| { |
| "epoch": 9.65484180249281, |
| "grad_norm": 0.24152809381484985, |
| "learning_rate": 4.918200405729986e-05, |
| "loss": 0.0453, |
| "step": 10070 |
| }, |
| { |
| "epoch": 9.664429530201343, |
| "grad_norm": 0.8806717395782471, |
| "learning_rate": 4.909499225970184e-05, |
| "loss": 0.0352, |
| "step": 10080 |
| }, |
| { |
| "epoch": 9.674017257909876, |
| "grad_norm": 0.3561595380306244, |
| "learning_rate": 4.9007983203662326e-05, |
| "loss": 0.0337, |
| "step": 10090 |
| }, |
| { |
| "epoch": 9.683604985618409, |
| "grad_norm": 0.3623135983943939, |
| "learning_rate": 4.892097715275961e-05, |
| "loss": 0.0361, |
| "step": 10100 |
| }, |
| { |
| "epoch": 9.693192713326942, |
| "grad_norm": 0.3282937705516815, |
| "learning_rate": 4.883397437056293e-05, |
| "loss": 0.0357, |
| "step": 10110 |
| }, |
| { |
| "epoch": 9.702780441035475, |
| "grad_norm": 0.28583481907844543, |
| "learning_rate": 4.87469751206316e-05, |
| "loss": 0.032, |
| "step": 10120 |
| }, |
| { |
| "epoch": 9.712368168744007, |
| "grad_norm": 0.20011906325817108, |
| "learning_rate": 4.865997966651421e-05, |
| "loss": 0.0335, |
| "step": 10130 |
| }, |
| { |
| "epoch": 9.72195589645254, |
| "grad_norm": 0.23072586953639984, |
| "learning_rate": 4.857298827174787e-05, |
| "loss": 0.0326, |
| "step": 10140 |
| }, |
| { |
| "epoch": 9.731543624161073, |
| "grad_norm": 0.21280129253864288, |
| "learning_rate": 4.8486001199857416e-05, |
| "loss": 0.0354, |
| "step": 10150 |
| }, |
| { |
| "epoch": 9.741131351869607, |
| "grad_norm": 0.4237668812274933, |
| "learning_rate": 4.839901871435457e-05, |
| "loss": 0.0351, |
| "step": 10160 |
| }, |
| { |
| "epoch": 9.75071907957814, |
| "grad_norm": 0.2798875868320465, |
| "learning_rate": 4.831204107873713e-05, |
| "loss": 0.0353, |
| "step": 10170 |
| }, |
| { |
| "epoch": 9.760306807286673, |
| "grad_norm": 0.20780718326568604, |
| "learning_rate": 4.822506855648825e-05, |
| "loss": 0.0326, |
| "step": 10180 |
| }, |
| { |
| "epoch": 9.769894534995206, |
| "grad_norm": 0.2649904489517212, |
| "learning_rate": 4.8138101411075574e-05, |
| "loss": 0.035, |
| "step": 10190 |
| }, |
| { |
| "epoch": 9.77948226270374, |
| "grad_norm": 0.26445141434669495, |
| "learning_rate": 4.805113990595046e-05, |
| "loss": 0.0468, |
| "step": 10200 |
| }, |
| { |
| "epoch": 9.789069990412273, |
| "grad_norm": 0.3209472894668579, |
| "learning_rate": 4.796418430454718e-05, |
| "loss": 0.0375, |
| "step": 10210 |
| }, |
| { |
| "epoch": 9.798657718120806, |
| "grad_norm": 0.19877949357032776, |
| "learning_rate": 4.787723487028209e-05, |
| "loss": 0.0381, |
| "step": 10220 |
| }, |
| { |
| "epoch": 9.808245445829339, |
| "grad_norm": 0.3071509301662445, |
| "learning_rate": 4.779029186655292e-05, |
| "loss": 0.0432, |
| "step": 10230 |
| }, |
| { |
| "epoch": 9.817833173537872, |
| "grad_norm": 0.4730135500431061, |
| "learning_rate": 4.77033555567379e-05, |
| "loss": 0.0374, |
| "step": 10240 |
| }, |
| { |
| "epoch": 9.827420901246404, |
| "grad_norm": 0.29888778924942017, |
| "learning_rate": 4.761642620419497e-05, |
| "loss": 0.0357, |
| "step": 10250 |
| }, |
| { |
| "epoch": 9.837008628954937, |
| "grad_norm": 0.2550467550754547, |
| "learning_rate": 4.7529504072260974e-05, |
| "loss": 0.0309, |
| "step": 10260 |
| }, |
| { |
| "epoch": 9.84659635666347, |
| "grad_norm": 0.25972646474838257, |
| "learning_rate": 4.744258942425094e-05, |
| "loss": 0.0421, |
| "step": 10270 |
| }, |
| { |
| "epoch": 9.856184084372003, |
| "grad_norm": 0.4071574807167053, |
| "learning_rate": 4.735568252345718e-05, |
| "loss": 0.0351, |
| "step": 10280 |
| }, |
| { |
| "epoch": 9.865771812080537, |
| "grad_norm": 0.4687805771827698, |
| "learning_rate": 4.726878363314855e-05, |
| "loss": 0.0369, |
| "step": 10290 |
| }, |
| { |
| "epoch": 9.87535953978907, |
| "grad_norm": 0.41865023970603943, |
| "learning_rate": 4.718189301656962e-05, |
| "loss": 0.0345, |
| "step": 10300 |
| }, |
| { |
| "epoch": 9.884947267497603, |
| "grad_norm": 0.30435627698898315, |
| "learning_rate": 4.709501093693997e-05, |
| "loss": 0.0321, |
| "step": 10310 |
| }, |
| { |
| "epoch": 9.894534995206136, |
| "grad_norm": 0.3561161458492279, |
| "learning_rate": 4.7008137657453214e-05, |
| "loss": 0.0409, |
| "step": 10320 |
| }, |
| { |
| "epoch": 9.90412272291467, |
| "grad_norm": 0.36440134048461914, |
| "learning_rate": 4.692127344127637e-05, |
| "loss": 0.033, |
| "step": 10330 |
| }, |
| { |
| "epoch": 9.913710450623203, |
| "grad_norm": 0.26994454860687256, |
| "learning_rate": 4.683441855154899e-05, |
| "loss": 0.0346, |
| "step": 10340 |
| }, |
| { |
| "epoch": 9.923298178331736, |
| "grad_norm": 0.2506847381591797, |
| "learning_rate": 4.674757325138239e-05, |
| "loss": 0.0314, |
| "step": 10350 |
| }, |
| { |
| "epoch": 9.93288590604027, |
| "grad_norm": 0.20864498615264893, |
| "learning_rate": 4.666073780385879e-05, |
| "loss": 0.0366, |
| "step": 10360 |
| }, |
| { |
| "epoch": 9.9424736337488, |
| "grad_norm": 0.18419000506401062, |
| "learning_rate": 4.65739124720306e-05, |
| "loss": 0.0329, |
| "step": 10370 |
| }, |
| { |
| "epoch": 9.952061361457334, |
| "grad_norm": 0.3387259244918823, |
| "learning_rate": 4.648709751891957e-05, |
| "loss": 0.0381, |
| "step": 10380 |
| }, |
| { |
| "epoch": 9.961649089165867, |
| "grad_norm": 0.2119244635105133, |
| "learning_rate": 4.640029320751606e-05, |
| "loss": 0.0351, |
| "step": 10390 |
| }, |
| { |
| "epoch": 9.9712368168744, |
| "grad_norm": 0.4716765880584717, |
| "learning_rate": 4.63134998007781e-05, |
| "loss": 0.0378, |
| "step": 10400 |
| }, |
| { |
| "epoch": 9.980824544582934, |
| "grad_norm": 0.47296905517578125, |
| "learning_rate": 4.622671756163075e-05, |
| "loss": 0.0397, |
| "step": 10410 |
| }, |
| { |
| "epoch": 9.990412272291467, |
| "grad_norm": 0.3720930218696594, |
| "learning_rate": 4.6139946752965216e-05, |
| "loss": 0.0387, |
| "step": 10420 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.2873878479003906, |
| "learning_rate": 4.6053187637638115e-05, |
| "loss": 0.0336, |
| "step": 10430 |
| }, |
| { |
| "epoch": 10.009587727708533, |
| "grad_norm": 0.27077776193618774, |
| "learning_rate": 4.596644047847061e-05, |
| "loss": 0.0335, |
| "step": 10440 |
| }, |
| { |
| "epoch": 10.019175455417066, |
| "grad_norm": 0.29882556200027466, |
| "learning_rate": 4.587970553824762e-05, |
| "loss": 0.0329, |
| "step": 10450 |
| }, |
| { |
| "epoch": 10.0287631831256, |
| "grad_norm": 0.23539794981479645, |
| "learning_rate": 4.579298307971709e-05, |
| "loss": 0.0319, |
| "step": 10460 |
| }, |
| { |
| "epoch": 10.038350910834133, |
| "grad_norm": 0.47081291675567627, |
| "learning_rate": 4.570627336558915e-05, |
| "loss": 0.0448, |
| "step": 10470 |
| }, |
| { |
| "epoch": 10.047938638542666, |
| "grad_norm": 0.21392913162708282, |
| "learning_rate": 4.561957665853532e-05, |
| "loss": 0.0406, |
| "step": 10480 |
| }, |
| { |
| "epoch": 10.0575263662512, |
| "grad_norm": 0.31942254304885864, |
| "learning_rate": 4.553289322118769e-05, |
| "loss": 0.0347, |
| "step": 10490 |
| }, |
| { |
| "epoch": 10.06711409395973, |
| "grad_norm": 0.22749362885951996, |
| "learning_rate": 4.544622331613817e-05, |
| "loss": 0.0414, |
| "step": 10500 |
| }, |
| { |
| "epoch": 10.076701821668264, |
| "grad_norm": 0.24884119629859924, |
| "learning_rate": 4.5359567205937706e-05, |
| "loss": 0.0314, |
| "step": 10510 |
| }, |
| { |
| "epoch": 10.086289549376797, |
| "grad_norm": 0.26897284388542175, |
| "learning_rate": 4.527292515309541e-05, |
| "loss": 0.0394, |
| "step": 10520 |
| }, |
| { |
| "epoch": 10.09587727708533, |
| "grad_norm": 0.3579690158367157, |
| "learning_rate": 4.518629742007786e-05, |
| "loss": 0.0365, |
| "step": 10530 |
| }, |
| { |
| "epoch": 10.105465004793864, |
| "grad_norm": 0.19811834394931793, |
| "learning_rate": 4.509968426930817e-05, |
| "loss": 0.0358, |
| "step": 10540 |
| }, |
| { |
| "epoch": 10.115052732502397, |
| "grad_norm": 0.2834417223930359, |
| "learning_rate": 4.501308596316537e-05, |
| "loss": 0.0329, |
| "step": 10550 |
| }, |
| { |
| "epoch": 10.12464046021093, |
| "grad_norm": 0.1813543736934662, |
| "learning_rate": 4.492650276398347e-05, |
| "loss": 0.0345, |
| "step": 10560 |
| }, |
| { |
| "epoch": 10.134228187919463, |
| "grad_norm": 0.23895332217216492, |
| "learning_rate": 4.483993493405075e-05, |
| "loss": 0.0328, |
| "step": 10570 |
| }, |
| { |
| "epoch": 10.143815915627997, |
| "grad_norm": 0.2329237461090088, |
| "learning_rate": 4.475338273560886e-05, |
| "loss": 0.0334, |
| "step": 10580 |
| }, |
| { |
| "epoch": 10.15340364333653, |
| "grad_norm": 0.32786402106285095, |
| "learning_rate": 4.466684643085223e-05, |
| "loss": 0.0362, |
| "step": 10590 |
| }, |
| { |
| "epoch": 10.162991371045063, |
| "grad_norm": 0.2858993709087372, |
| "learning_rate": 4.458032628192699e-05, |
| "loss": 0.0349, |
| "step": 10600 |
| }, |
| { |
| "epoch": 10.172579098753596, |
| "grad_norm": 0.38395509123802185, |
| "learning_rate": 4.449382255093044e-05, |
| "loss": 0.0384, |
| "step": 10610 |
| }, |
| { |
| "epoch": 10.182166826462128, |
| "grad_norm": 0.35513293743133545, |
| "learning_rate": 4.440733549991006e-05, |
| "loss": 0.0317, |
| "step": 10620 |
| }, |
| { |
| "epoch": 10.191754554170661, |
| "grad_norm": 0.21551890671253204, |
| "learning_rate": 4.432086539086292e-05, |
| "loss": 0.0373, |
| "step": 10630 |
| }, |
| { |
| "epoch": 10.201342281879194, |
| "grad_norm": 0.22998203337192535, |
| "learning_rate": 4.423441248573463e-05, |
| "loss": 0.0376, |
| "step": 10640 |
| }, |
| { |
| "epoch": 10.210930009587727, |
| "grad_norm": 0.4294188618659973, |
| "learning_rate": 4.4147977046418776e-05, |
| "loss": 0.0356, |
| "step": 10650 |
| }, |
| { |
| "epoch": 10.22051773729626, |
| "grad_norm": 0.2688153386116028, |
| "learning_rate": 4.406155933475599e-05, |
| "loss": 0.0364, |
| "step": 10660 |
| }, |
| { |
| "epoch": 10.230105465004794, |
| "grad_norm": 0.39193832874298096, |
| "learning_rate": 4.3975159612533244e-05, |
| "loss": 0.0337, |
| "step": 10670 |
| }, |
| { |
| "epoch": 10.239693192713327, |
| "grad_norm": 0.4422641694545746, |
| "learning_rate": 4.388877814148296e-05, |
| "loss": 0.0328, |
| "step": 10680 |
| }, |
| { |
| "epoch": 10.24928092042186, |
| "grad_norm": 0.25854796171188354, |
| "learning_rate": 4.380241518328231e-05, |
| "loss": 0.0338, |
| "step": 10690 |
| }, |
| { |
| "epoch": 10.258868648130393, |
| "grad_norm": 0.282626748085022, |
| "learning_rate": 4.371607099955236e-05, |
| "loss": 0.0398, |
| "step": 10700 |
| }, |
| { |
| "epoch": 10.268456375838927, |
| "grad_norm": 0.2568127512931824, |
| "learning_rate": 4.362974585185734e-05, |
| "loss": 0.0354, |
| "step": 10710 |
| }, |
| { |
| "epoch": 10.27804410354746, |
| "grad_norm": 0.28798142075538635, |
| "learning_rate": 4.3543440001703786e-05, |
| "loss": 0.0354, |
| "step": 10720 |
| }, |
| { |
| "epoch": 10.287631831255993, |
| "grad_norm": 0.28471261262893677, |
| "learning_rate": 4.345715371053976e-05, |
| "loss": 0.0365, |
| "step": 10730 |
| }, |
| { |
| "epoch": 10.297219558964525, |
| "grad_norm": 0.27555039525032043, |
| "learning_rate": 4.3370887239754085e-05, |
| "loss": 0.0324, |
| "step": 10740 |
| }, |
| { |
| "epoch": 10.306807286673058, |
| "grad_norm": 0.34258362650871277, |
| "learning_rate": 4.328464085067559e-05, |
| "loss": 0.0313, |
| "step": 10750 |
| }, |
| { |
| "epoch": 10.316395014381591, |
| "grad_norm": 0.2875727117061615, |
| "learning_rate": 4.319841480457221e-05, |
| "loss": 0.034, |
| "step": 10760 |
| }, |
| { |
| "epoch": 10.325982742090124, |
| "grad_norm": 0.37291842699050903, |
| "learning_rate": 4.311220936265025e-05, |
| "loss": 0.0358, |
| "step": 10770 |
| }, |
| { |
| "epoch": 10.335570469798657, |
| "grad_norm": 0.28330934047698975, |
| "learning_rate": 4.302602478605364e-05, |
| "loss": 0.0371, |
| "step": 10780 |
| }, |
| { |
| "epoch": 10.34515819750719, |
| "grad_norm": 0.2582619786262512, |
| "learning_rate": 4.29398613358631e-05, |
| "loss": 0.0373, |
| "step": 10790 |
| }, |
| { |
| "epoch": 10.354745925215724, |
| "grad_norm": 0.4369192123413086, |
| "learning_rate": 4.2853719273095306e-05, |
| "loss": 0.035, |
| "step": 10800 |
| }, |
| { |
| "epoch": 10.364333652924257, |
| "grad_norm": 0.7189898490905762, |
| "learning_rate": 4.276759885870221e-05, |
| "loss": 0.0306, |
| "step": 10810 |
| }, |
| { |
| "epoch": 10.37392138063279, |
| "grad_norm": 0.25174766778945923, |
| "learning_rate": 4.26815003535701e-05, |
| "loss": 0.0409, |
| "step": 10820 |
| }, |
| { |
| "epoch": 10.383509108341324, |
| "grad_norm": 0.251800537109375, |
| "learning_rate": 4.2595424018518994e-05, |
| "loss": 0.0338, |
| "step": 10830 |
| }, |
| { |
| "epoch": 10.393096836049857, |
| "grad_norm": 0.2858979105949402, |
| "learning_rate": 4.250937011430167e-05, |
| "loss": 0.041, |
| "step": 10840 |
| }, |
| { |
| "epoch": 10.40268456375839, |
| "grad_norm": 0.1836014688014984, |
| "learning_rate": 4.2423338901602985e-05, |
| "loss": 0.0356, |
| "step": 10850 |
| }, |
| { |
| "epoch": 10.412272291466923, |
| "grad_norm": 0.279307097196579, |
| "learning_rate": 4.233733064103906e-05, |
| "loss": 0.0359, |
| "step": 10860 |
| }, |
| { |
| "epoch": 10.421860019175455, |
| "grad_norm": 0.32045918703079224, |
| "learning_rate": 4.225134559315647e-05, |
| "loss": 0.0377, |
| "step": 10870 |
| }, |
| { |
| "epoch": 10.431447746883988, |
| "grad_norm": 0.2521663010120392, |
| "learning_rate": 4.2165384018431495e-05, |
| "loss": 0.0301, |
| "step": 10880 |
| }, |
| { |
| "epoch": 10.441035474592521, |
| "grad_norm": 0.7854000329971313, |
| "learning_rate": 4.207944617726931e-05, |
| "loss": 0.0337, |
| "step": 10890 |
| }, |
| { |
| "epoch": 10.450623202301054, |
| "grad_norm": 0.2677070200443268, |
| "learning_rate": 4.1993532330003146e-05, |
| "loss": 0.0392, |
| "step": 10900 |
| }, |
| { |
| "epoch": 10.460210930009588, |
| "grad_norm": 0.4461430609226227, |
| "learning_rate": 4.190764273689359e-05, |
| "loss": 0.0306, |
| "step": 10910 |
| }, |
| { |
| "epoch": 10.46979865771812, |
| "grad_norm": 0.30843472480773926, |
| "learning_rate": 4.1821777658127765e-05, |
| "loss": 0.0259, |
| "step": 10920 |
| }, |
| { |
| "epoch": 10.479386385426654, |
| "grad_norm": 0.5075517296791077, |
| "learning_rate": 4.17359373538185e-05, |
| "loss": 0.0376, |
| "step": 10930 |
| }, |
| { |
| "epoch": 10.488974113135187, |
| "grad_norm": 0.3522166609764099, |
| "learning_rate": 4.16501220840036e-05, |
| "loss": 0.0272, |
| "step": 10940 |
| }, |
| { |
| "epoch": 10.49856184084372, |
| "grad_norm": 0.3115832805633545, |
| "learning_rate": 4.156433210864499e-05, |
| "loss": 0.0421, |
| "step": 10950 |
| }, |
| { |
| "epoch": 10.508149568552254, |
| "grad_norm": 0.29928937554359436, |
| "learning_rate": 4.147856768762804e-05, |
| "loss": 0.0329, |
| "step": 10960 |
| }, |
| { |
| "epoch": 10.517737296260787, |
| "grad_norm": 0.2621513903141022, |
| "learning_rate": 4.139282908076064e-05, |
| "loss": 0.0313, |
| "step": 10970 |
| }, |
| { |
| "epoch": 10.527325023969318, |
| "grad_norm": 0.31416305899620056, |
| "learning_rate": 4.130711654777254e-05, |
| "loss": 0.0311, |
| "step": 10980 |
| }, |
| { |
| "epoch": 10.536912751677852, |
| "grad_norm": 0.23825299739837646, |
| "learning_rate": 4.1221430348314415e-05, |
| "loss": 0.0386, |
| "step": 10990 |
| }, |
| { |
| "epoch": 10.546500479386385, |
| "grad_norm": 0.2471434473991394, |
| "learning_rate": 4.11357707419573e-05, |
| "loss": 0.038, |
| "step": 11000 |
| }, |
| { |
| "epoch": 10.556088207094918, |
| "grad_norm": 0.2707345187664032, |
| "learning_rate": 4.105013798819155e-05, |
| "loss": 0.0356, |
| "step": 11010 |
| }, |
| { |
| "epoch": 10.565675934803451, |
| "grad_norm": 0.3994966149330139, |
| "learning_rate": 4.0964532346426235e-05, |
| "loss": 0.0326, |
| "step": 11020 |
| }, |
| { |
| "epoch": 10.575263662511984, |
| "grad_norm": 0.5146787762641907, |
| "learning_rate": 4.087895407598824e-05, |
| "loss": 0.0361, |
| "step": 11030 |
| }, |
| { |
| "epoch": 10.584851390220518, |
| "grad_norm": 0.2920519709587097, |
| "learning_rate": 4.079340343612165e-05, |
| "loss": 0.0326, |
| "step": 11040 |
| }, |
| { |
| "epoch": 10.594439117929051, |
| "grad_norm": 0.27901026606559753, |
| "learning_rate": 4.070788068598672e-05, |
| "loss": 0.037, |
| "step": 11050 |
| }, |
| { |
| "epoch": 10.604026845637584, |
| "grad_norm": 0.26402774453163147, |
| "learning_rate": 4.062238608465927e-05, |
| "loss": 0.0337, |
| "step": 11060 |
| }, |
| { |
| "epoch": 10.613614573346117, |
| "grad_norm": 0.24872805178165436, |
| "learning_rate": 4.053691989112986e-05, |
| "loss": 0.0343, |
| "step": 11070 |
| }, |
| { |
| "epoch": 10.62320230105465, |
| "grad_norm": 0.21889743208885193, |
| "learning_rate": 4.0451482364303e-05, |
| "loss": 0.0329, |
| "step": 11080 |
| }, |
| { |
| "epoch": 10.632790028763184, |
| "grad_norm": 0.31977149844169617, |
| "learning_rate": 4.03660737629963e-05, |
| "loss": 0.0395, |
| "step": 11090 |
| }, |
| { |
| "epoch": 10.642377756471717, |
| "grad_norm": 0.3449043929576874, |
| "learning_rate": 4.028069434593982e-05, |
| "loss": 0.0362, |
| "step": 11100 |
| }, |
| { |
| "epoch": 10.651965484180248, |
| "grad_norm": 0.356534481048584, |
| "learning_rate": 4.019534437177516e-05, |
| "loss": 0.0453, |
| "step": 11110 |
| }, |
| { |
| "epoch": 10.661553211888782, |
| "grad_norm": 0.3510785400867462, |
| "learning_rate": 4.0110024099054756e-05, |
| "loss": 0.03, |
| "step": 11120 |
| }, |
| { |
| "epoch": 10.671140939597315, |
| "grad_norm": 0.4049818813800812, |
| "learning_rate": 4.002473378624107e-05, |
| "loss": 0.0337, |
| "step": 11130 |
| }, |
| { |
| "epoch": 10.680728667305848, |
| "grad_norm": 0.2889692485332489, |
| "learning_rate": 3.9939473691705765e-05, |
| "loss": 0.0369, |
| "step": 11140 |
| }, |
| { |
| "epoch": 10.690316395014381, |
| "grad_norm": 0.25454413890838623, |
| "learning_rate": 3.9854244073728996e-05, |
| "loss": 0.0373, |
| "step": 11150 |
| }, |
| { |
| "epoch": 10.699904122722915, |
| "grad_norm": 0.28601503372192383, |
| "learning_rate": 3.976904519049862e-05, |
| "loss": 0.0384, |
| "step": 11160 |
| }, |
| { |
| "epoch": 10.709491850431448, |
| "grad_norm": 0.22738857567310333, |
| "learning_rate": 3.968387730010935e-05, |
| "loss": 0.0352, |
| "step": 11170 |
| }, |
| { |
| "epoch": 10.719079578139981, |
| "grad_norm": 0.2723415493965149, |
| "learning_rate": 3.9598740660562005e-05, |
| "loss": 0.0372, |
| "step": 11180 |
| }, |
| { |
| "epoch": 10.728667305848514, |
| "grad_norm": 0.35877975821495056, |
| "learning_rate": 3.951363552976275e-05, |
| "loss": 0.0321, |
| "step": 11190 |
| }, |
| { |
| "epoch": 10.738255033557047, |
| "grad_norm": 0.2732999324798584, |
| "learning_rate": 3.942856216552234e-05, |
| "loss": 0.0423, |
| "step": 11200 |
| }, |
| { |
| "epoch": 10.74784276126558, |
| "grad_norm": 0.1939064860343933, |
| "learning_rate": 3.934352082555522e-05, |
| "loss": 0.0383, |
| "step": 11210 |
| }, |
| { |
| "epoch": 10.757430488974114, |
| "grad_norm": 0.34008413553237915, |
| "learning_rate": 3.92585117674789e-05, |
| "loss": 0.0374, |
| "step": 11220 |
| }, |
| { |
| "epoch": 10.767018216682647, |
| "grad_norm": 0.32701992988586426, |
| "learning_rate": 3.917353524881302e-05, |
| "loss": 0.0336, |
| "step": 11230 |
| }, |
| { |
| "epoch": 10.776605944391179, |
| "grad_norm": 0.29676583409309387, |
| "learning_rate": 3.908859152697872e-05, |
| "loss": 0.0358, |
| "step": 11240 |
| }, |
| { |
| "epoch": 10.786193672099712, |
| "grad_norm": 0.21634122729301453, |
| "learning_rate": 3.900368085929775e-05, |
| "loss": 0.0357, |
| "step": 11250 |
| }, |
| { |
| "epoch": 10.795781399808245, |
| "grad_norm": 0.29007887840270996, |
| "learning_rate": 3.8918803502991744e-05, |
| "loss": 0.0396, |
| "step": 11260 |
| }, |
| { |
| "epoch": 10.805369127516778, |
| "grad_norm": 0.2906304895877838, |
| "learning_rate": 3.883395971518138e-05, |
| "loss": 0.0293, |
| "step": 11270 |
| }, |
| { |
| "epoch": 10.814956855225311, |
| "grad_norm": 0.19408248364925385, |
| "learning_rate": 3.874914975288575e-05, |
| "loss": 0.0338, |
| "step": 11280 |
| }, |
| { |
| "epoch": 10.824544582933845, |
| "grad_norm": 0.9713996052742004, |
| "learning_rate": 3.8664373873021356e-05, |
| "loss": 0.0367, |
| "step": 11290 |
| }, |
| { |
| "epoch": 10.834132310642378, |
| "grad_norm": 0.43305110931396484, |
| "learning_rate": 3.857963233240153e-05, |
| "loss": 0.0409, |
| "step": 11300 |
| }, |
| { |
| "epoch": 10.843720038350911, |
| "grad_norm": 0.4623974859714508, |
| "learning_rate": 3.849492538773552e-05, |
| "loss": 0.0322, |
| "step": 11310 |
| }, |
| { |
| "epoch": 10.853307766059444, |
| "grad_norm": 0.13911698758602142, |
| "learning_rate": 3.841025329562789e-05, |
| "loss": 0.0371, |
| "step": 11320 |
| }, |
| { |
| "epoch": 10.862895493767978, |
| "grad_norm": 0.40783533453941345, |
| "learning_rate": 3.832561631257748e-05, |
| "loss": 0.0334, |
| "step": 11330 |
| }, |
| { |
| "epoch": 10.87248322147651, |
| "grad_norm": 0.2820438742637634, |
| "learning_rate": 3.824101469497685e-05, |
| "loss": 0.0357, |
| "step": 11340 |
| }, |
| { |
| "epoch": 10.882070949185042, |
| "grad_norm": 0.2518521547317505, |
| "learning_rate": 3.8156448699111414e-05, |
| "loss": 0.0398, |
| "step": 11350 |
| }, |
| { |
| "epoch": 10.891658676893575, |
| "grad_norm": 0.22868366539478302, |
| "learning_rate": 3.80719185811587e-05, |
| "loss": 0.0329, |
| "step": 11360 |
| }, |
| { |
| "epoch": 10.901246404602109, |
| "grad_norm": 0.28649628162384033, |
| "learning_rate": 3.79874245971875e-05, |
| "loss": 0.0362, |
| "step": 11370 |
| }, |
| { |
| "epoch": 10.910834132310642, |
| "grad_norm": 0.2933325171470642, |
| "learning_rate": 3.790296700315717e-05, |
| "loss": 0.0322, |
| "step": 11380 |
| }, |
| { |
| "epoch": 10.920421860019175, |
| "grad_norm": 0.34184950590133667, |
| "learning_rate": 3.781854605491684e-05, |
| "loss": 0.034, |
| "step": 11390 |
| }, |
| { |
| "epoch": 10.930009587727708, |
| "grad_norm": 0.26722094416618347, |
| "learning_rate": 3.773416200820463e-05, |
| "loss": 0.0369, |
| "step": 11400 |
| }, |
| { |
| "epoch": 10.939597315436242, |
| "grad_norm": 0.22674645483493805, |
| "learning_rate": 3.764981511864686e-05, |
| "loss": 0.0349, |
| "step": 11410 |
| }, |
| { |
| "epoch": 10.949185043144775, |
| "grad_norm": 0.6623883843421936, |
| "learning_rate": 3.756550564175727e-05, |
| "loss": 0.0331, |
| "step": 11420 |
| }, |
| { |
| "epoch": 10.958772770853308, |
| "grad_norm": 0.3025140166282654, |
| "learning_rate": 3.748123383293629e-05, |
| "loss": 0.0364, |
| "step": 11430 |
| }, |
| { |
| "epoch": 10.968360498561841, |
| "grad_norm": 0.2423921674489975, |
| "learning_rate": 3.739699994747026e-05, |
| "loss": 0.0305, |
| "step": 11440 |
| }, |
| { |
| "epoch": 10.977948226270374, |
| "grad_norm": 0.2216835469007492, |
| "learning_rate": 3.731280424053061e-05, |
| "loss": 0.0338, |
| "step": 11450 |
| }, |
| { |
| "epoch": 10.987535953978908, |
| "grad_norm": 0.4063700735569, |
| "learning_rate": 3.7228646967173096e-05, |
| "loss": 0.0437, |
| "step": 11460 |
| }, |
| { |
| "epoch": 10.997123681687441, |
| "grad_norm": 0.21180011332035065, |
| "learning_rate": 3.7144528382337086e-05, |
| "loss": 0.0362, |
| "step": 11470 |
| }, |
| { |
| "epoch": 11.006711409395972, |
| "grad_norm": 0.22706526517868042, |
| "learning_rate": 3.706044874084474e-05, |
| "loss": 0.0343, |
| "step": 11480 |
| }, |
| { |
| "epoch": 11.016299137104506, |
| "grad_norm": 0.3348940908908844, |
| "learning_rate": 3.6976408297400257e-05, |
| "loss": 0.0344, |
| "step": 11490 |
| }, |
| { |
| "epoch": 11.025886864813039, |
| "grad_norm": 0.21291491389274597, |
| "learning_rate": 3.6892407306589035e-05, |
| "loss": 0.0329, |
| "step": 11500 |
| }, |
| { |
| "epoch": 11.035474592521572, |
| "grad_norm": 0.3505829870700836, |
| "learning_rate": 3.6808446022877e-05, |
| "loss": 0.0339, |
| "step": 11510 |
| }, |
| { |
| "epoch": 11.045062320230105, |
| "grad_norm": 0.36319780349731445, |
| "learning_rate": 3.672452470060982e-05, |
| "loss": 0.0338, |
| "step": 11520 |
| }, |
| { |
| "epoch": 11.054650047938638, |
| "grad_norm": 0.3714457154273987, |
| "learning_rate": 3.6640643594012057e-05, |
| "loss": 0.0419, |
| "step": 11530 |
| }, |
| { |
| "epoch": 11.064237775647172, |
| "grad_norm": 0.27974534034729004, |
| "learning_rate": 3.6556802957186486e-05, |
| "loss": 0.0359, |
| "step": 11540 |
| }, |
| { |
| "epoch": 11.073825503355705, |
| "grad_norm": 0.34719452261924744, |
| "learning_rate": 3.647300304411323e-05, |
| "loss": 0.0367, |
| "step": 11550 |
| }, |
| { |
| "epoch": 11.083413231064238, |
| "grad_norm": 0.24294276535511017, |
| "learning_rate": 3.6389244108649114e-05, |
| "loss": 0.0316, |
| "step": 11560 |
| }, |
| { |
| "epoch": 11.093000958772771, |
| "grad_norm": 0.3280002474784851, |
| "learning_rate": 3.6305526404526785e-05, |
| "loss": 0.0315, |
| "step": 11570 |
| }, |
| { |
| "epoch": 11.102588686481305, |
| "grad_norm": 0.25797387957572937, |
| "learning_rate": 3.6221850185354014e-05, |
| "loss": 0.0306, |
| "step": 11580 |
| }, |
| { |
| "epoch": 11.112176414189838, |
| "grad_norm": 0.2705564498901367, |
| "learning_rate": 3.613821570461284e-05, |
| "loss": 0.0333, |
| "step": 11590 |
| }, |
| { |
| "epoch": 11.12176414189837, |
| "grad_norm": 0.2857078015804291, |
| "learning_rate": 3.605462321565899e-05, |
| "loss": 0.0329, |
| "step": 11600 |
| }, |
| { |
| "epoch": 11.131351869606902, |
| "grad_norm": 0.23920407891273499, |
| "learning_rate": 3.597107297172084e-05, |
| "loss": 0.0366, |
| "step": 11610 |
| }, |
| { |
| "epoch": 11.140939597315436, |
| "grad_norm": 0.31336209177970886, |
| "learning_rate": 3.588756522589888e-05, |
| "loss": 0.03, |
| "step": 11620 |
| }, |
| { |
| "epoch": 11.150527325023969, |
| "grad_norm": 0.2026471495628357, |
| "learning_rate": 3.5804100231164824e-05, |
| "loss": 0.0328, |
| "step": 11630 |
| }, |
| { |
| "epoch": 11.160115052732502, |
| "grad_norm": 0.166408970952034, |
| "learning_rate": 3.572067824036092e-05, |
| "loss": 0.0357, |
| "step": 11640 |
| }, |
| { |
| "epoch": 11.169702780441035, |
| "grad_norm": 0.24978677928447723, |
| "learning_rate": 3.5637299506199075e-05, |
| "loss": 0.0289, |
| "step": 11650 |
| }, |
| { |
| "epoch": 11.179290508149569, |
| "grad_norm": 0.36853691935539246, |
| "learning_rate": 3.5553964281260225e-05, |
| "loss": 0.036, |
| "step": 11660 |
| }, |
| { |
| "epoch": 11.188878235858102, |
| "grad_norm": 0.31218189001083374, |
| "learning_rate": 3.547067281799345e-05, |
| "loss": 0.0327, |
| "step": 11670 |
| }, |
| { |
| "epoch": 11.198465963566635, |
| "grad_norm": 0.2616768777370453, |
| "learning_rate": 3.538742536871531e-05, |
| "loss": 0.0378, |
| "step": 11680 |
| }, |
| { |
| "epoch": 11.208053691275168, |
| "grad_norm": 0.3586946725845337, |
| "learning_rate": 3.530422218560903e-05, |
| "loss": 0.0378, |
| "step": 11690 |
| }, |
| { |
| "epoch": 11.217641418983701, |
| "grad_norm": 0.1958369016647339, |
| "learning_rate": 3.522106352072366e-05, |
| "loss": 0.0368, |
| "step": 11700 |
| }, |
| { |
| "epoch": 11.227229146692235, |
| "grad_norm": 0.30349719524383545, |
| "learning_rate": 3.5137949625973484e-05, |
| "loss": 0.0396, |
| "step": 11710 |
| }, |
| { |
| "epoch": 11.236816874400766, |
| "grad_norm": 0.22439143061637878, |
| "learning_rate": 3.505488075313712e-05, |
| "loss": 0.0275, |
| "step": 11720 |
| }, |
| { |
| "epoch": 11.2464046021093, |
| "grad_norm": 0.3639642596244812, |
| "learning_rate": 3.4971857153856825e-05, |
| "loss": 0.03, |
| "step": 11730 |
| }, |
| { |
| "epoch": 11.255992329817833, |
| "grad_norm": 0.19874945282936096, |
| "learning_rate": 3.488887907963766e-05, |
| "loss": 0.0341, |
| "step": 11740 |
| }, |
| { |
| "epoch": 11.265580057526366, |
| "grad_norm": 0.6180244088172913, |
| "learning_rate": 3.480594678184681e-05, |
| "loss": 0.0346, |
| "step": 11750 |
| }, |
| { |
| "epoch": 11.275167785234899, |
| "grad_norm": 0.27457571029663086, |
| "learning_rate": 3.472306051171281e-05, |
| "loss": 0.0359, |
| "step": 11760 |
| }, |
| { |
| "epoch": 11.284755512943432, |
| "grad_norm": 0.18931525945663452, |
| "learning_rate": 3.464022052032473e-05, |
| "loss": 0.0311, |
| "step": 11770 |
| }, |
| { |
| "epoch": 11.294343240651965, |
| "grad_norm": 0.2550256848335266, |
| "learning_rate": 3.455742705863143e-05, |
| "loss": 0.0346, |
| "step": 11780 |
| }, |
| { |
| "epoch": 11.303930968360499, |
| "grad_norm": 0.21088473498821259, |
| "learning_rate": 3.447468037744084e-05, |
| "loss": 0.0295, |
| "step": 11790 |
| }, |
| { |
| "epoch": 11.313518696069032, |
| "grad_norm": 0.25027552247047424, |
| "learning_rate": 3.439198072741921e-05, |
| "loss": 0.0375, |
| "step": 11800 |
| }, |
| { |
| "epoch": 11.323106423777565, |
| "grad_norm": 0.5064207315444946, |
| "learning_rate": 3.4309328359090264e-05, |
| "loss": 0.0332, |
| "step": 11810 |
| }, |
| { |
| "epoch": 11.332694151486098, |
| "grad_norm": 0.2110755443572998, |
| "learning_rate": 3.422672352283453e-05, |
| "loss": 0.0351, |
| "step": 11820 |
| }, |
| { |
| "epoch": 11.342281879194632, |
| "grad_norm": 0.27771392464637756, |
| "learning_rate": 3.41441664688885e-05, |
| "loss": 0.0383, |
| "step": 11830 |
| }, |
| { |
| "epoch": 11.351869606903165, |
| "grad_norm": 0.34242868423461914, |
| "learning_rate": 3.406165744734397e-05, |
| "loss": 0.0298, |
| "step": 11840 |
| }, |
| { |
| "epoch": 11.361457334611696, |
| "grad_norm": 0.3390040099620819, |
| "learning_rate": 3.397919670814723e-05, |
| "loss": 0.0377, |
| "step": 11850 |
| }, |
| { |
| "epoch": 11.37104506232023, |
| "grad_norm": 0.15492115914821625, |
| "learning_rate": 3.389678450109827e-05, |
| "loss": 0.0403, |
| "step": 11860 |
| }, |
| { |
| "epoch": 11.380632790028763, |
| "grad_norm": 0.3101263642311096, |
| "learning_rate": 3.3814421075850035e-05, |
| "loss": 0.0362, |
| "step": 11870 |
| }, |
| { |
| "epoch": 11.390220517737296, |
| "grad_norm": 0.2800522446632385, |
| "learning_rate": 3.3732106681907816e-05, |
| "loss": 0.032, |
| "step": 11880 |
| }, |
| { |
| "epoch": 11.39980824544583, |
| "grad_norm": 0.26244333386421204, |
| "learning_rate": 3.364984156862825e-05, |
| "loss": 0.0307, |
| "step": 11890 |
| }, |
| { |
| "epoch": 11.409395973154362, |
| "grad_norm": 0.48606979846954346, |
| "learning_rate": 3.356762598521874e-05, |
| "loss": 0.0335, |
| "step": 11900 |
| }, |
| { |
| "epoch": 11.418983700862896, |
| "grad_norm": 0.5852661728858948, |
| "learning_rate": 3.348546018073662e-05, |
| "loss": 0.0433, |
| "step": 11910 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 0.252837598323822, |
| "learning_rate": 3.340334440408846e-05, |
| "loss": 0.0257, |
| "step": 11920 |
| }, |
| { |
| "epoch": 11.438159156279962, |
| "grad_norm": 0.2573808431625366, |
| "learning_rate": 3.332127890402926e-05, |
| "loss": 0.0331, |
| "step": 11930 |
| }, |
| { |
| "epoch": 11.447746883988495, |
| "grad_norm": 0.25154879689216614, |
| "learning_rate": 3.3239263929161734e-05, |
| "loss": 0.0389, |
| "step": 11940 |
| }, |
| { |
| "epoch": 11.457334611697028, |
| "grad_norm": 0.2564004957675934, |
| "learning_rate": 3.315729972793553e-05, |
| "loss": 0.0386, |
| "step": 11950 |
| }, |
| { |
| "epoch": 11.466922339405562, |
| "grad_norm": 0.45886269211769104, |
| "learning_rate": 3.307538654864645e-05, |
| "loss": 0.0365, |
| "step": 11960 |
| }, |
| { |
| "epoch": 11.476510067114093, |
| "grad_norm": 0.157767191529274, |
| "learning_rate": 3.29935246394358e-05, |
| "loss": 0.0356, |
| "step": 11970 |
| }, |
| { |
| "epoch": 11.486097794822626, |
| "grad_norm": 0.3403734564781189, |
| "learning_rate": 3.2911714248289525e-05, |
| "loss": 0.0335, |
| "step": 11980 |
| }, |
| { |
| "epoch": 11.49568552253116, |
| "grad_norm": 0.207637757062912, |
| "learning_rate": 3.282995562303754e-05, |
| "loss": 0.0291, |
| "step": 11990 |
| }, |
| { |
| "epoch": 11.505273250239693, |
| "grad_norm": 0.2571353614330292, |
| "learning_rate": 3.2748249011352864e-05, |
| "loss": 0.031, |
| "step": 12000 |
| }, |
| { |
| "epoch": 11.514860977948226, |
| "grad_norm": 0.29838424921035767, |
| "learning_rate": 3.266659466075108e-05, |
| "loss": 0.0312, |
| "step": 12010 |
| }, |
| { |
| "epoch": 11.52444870565676, |
| "grad_norm": 0.35853999853134155, |
| "learning_rate": 3.258499281858936e-05, |
| "loss": 0.0349, |
| "step": 12020 |
| }, |
| { |
| "epoch": 11.534036433365292, |
| "grad_norm": 0.22435715794563293, |
| "learning_rate": 3.250344373206584e-05, |
| "loss": 0.0321, |
| "step": 12030 |
| }, |
| { |
| "epoch": 11.543624161073826, |
| "grad_norm": 0.364653617143631, |
| "learning_rate": 3.242194764821881e-05, |
| "loss": 0.0291, |
| "step": 12040 |
| }, |
| { |
| "epoch": 11.553211888782359, |
| "grad_norm": 0.20518454909324646, |
| "learning_rate": 3.2340504813926086e-05, |
| "loss": 0.0335, |
| "step": 12050 |
| }, |
| { |
| "epoch": 11.562799616490892, |
| "grad_norm": 0.3099921941757202, |
| "learning_rate": 3.2259115475904064e-05, |
| "loss": 0.036, |
| "step": 12060 |
| }, |
| { |
| "epoch": 11.572387344199425, |
| "grad_norm": 0.40152508020401, |
| "learning_rate": 3.217777988070715e-05, |
| "loss": 0.0377, |
| "step": 12070 |
| }, |
| { |
| "epoch": 11.581975071907959, |
| "grad_norm": 0.2941493093967438, |
| "learning_rate": 3.2096498274726925e-05, |
| "loss": 0.0304, |
| "step": 12080 |
| }, |
| { |
| "epoch": 11.59156279961649, |
| "grad_norm": 0.1939501017332077, |
| "learning_rate": 3.201527090419144e-05, |
| "loss": 0.0309, |
| "step": 12090 |
| }, |
| { |
| "epoch": 11.601150527325023, |
| "grad_norm": 0.28782132267951965, |
| "learning_rate": 3.193409801516443e-05, |
| "loss": 0.0368, |
| "step": 12100 |
| }, |
| { |
| "epoch": 11.610738255033556, |
| "grad_norm": 0.22255367040634155, |
| "learning_rate": 3.1852979853544575e-05, |
| "loss": 0.034, |
| "step": 12110 |
| }, |
| { |
| "epoch": 11.62032598274209, |
| "grad_norm": 0.24580125510692596, |
| "learning_rate": 3.177191666506479e-05, |
| "loss": 0.0316, |
| "step": 12120 |
| }, |
| { |
| "epoch": 11.629913710450623, |
| "grad_norm": 0.16919176280498505, |
| "learning_rate": 3.169090869529146e-05, |
| "loss": 0.032, |
| "step": 12130 |
| }, |
| { |
| "epoch": 11.639501438159156, |
| "grad_norm": 0.16586647927761078, |
| "learning_rate": 3.1609956189623704e-05, |
| "loss": 0.0318, |
| "step": 12140 |
| }, |
| { |
| "epoch": 11.64908916586769, |
| "grad_norm": 0.25521326065063477, |
| "learning_rate": 3.1529059393292573e-05, |
| "loss": 0.0339, |
| "step": 12150 |
| }, |
| { |
| "epoch": 11.658676893576223, |
| "grad_norm": 0.40948987007141113, |
| "learning_rate": 3.1448218551360394e-05, |
| "loss": 0.0417, |
| "step": 12160 |
| }, |
| { |
| "epoch": 11.668264621284756, |
| "grad_norm": 0.2603534460067749, |
| "learning_rate": 3.136743390872001e-05, |
| "loss": 0.0332, |
| "step": 12170 |
| }, |
| { |
| "epoch": 11.677852348993289, |
| "grad_norm": 0.24372372031211853, |
| "learning_rate": 3.128670571009399e-05, |
| "loss": 0.0325, |
| "step": 12180 |
| }, |
| { |
| "epoch": 11.687440076701822, |
| "grad_norm": 0.18494637310504913, |
| "learning_rate": 3.1206034200033904e-05, |
| "loss": 0.0324, |
| "step": 12190 |
| }, |
| { |
| "epoch": 11.697027804410356, |
| "grad_norm": 0.3946174681186676, |
| "learning_rate": 3.1125419622919614e-05, |
| "loss": 0.0327, |
| "step": 12200 |
| }, |
| { |
| "epoch": 11.706615532118889, |
| "grad_norm": 0.5735461115837097, |
| "learning_rate": 3.104486222295853e-05, |
| "loss": 0.0294, |
| "step": 12210 |
| }, |
| { |
| "epoch": 11.71620325982742, |
| "grad_norm": 0.25579607486724854, |
| "learning_rate": 3.096436224418482e-05, |
| "loss": 0.0347, |
| "step": 12220 |
| }, |
| { |
| "epoch": 11.725790987535953, |
| "grad_norm": 0.40547341108322144, |
| "learning_rate": 3.088391993045873e-05, |
| "loss": 0.037, |
| "step": 12230 |
| }, |
| { |
| "epoch": 11.735378715244487, |
| "grad_norm": 0.3765973746776581, |
| "learning_rate": 3.080353552546578e-05, |
| "loss": 0.0307, |
| "step": 12240 |
| }, |
| { |
| "epoch": 11.74496644295302, |
| "grad_norm": 0.40163904428482056, |
| "learning_rate": 3.0723209272716124e-05, |
| "loss": 0.0295, |
| "step": 12250 |
| }, |
| { |
| "epoch": 11.754554170661553, |
| "grad_norm": 0.3667445182800293, |
| "learning_rate": 3.064294141554372e-05, |
| "loss": 0.0328, |
| "step": 12260 |
| }, |
| { |
| "epoch": 11.764141898370086, |
| "grad_norm": 0.22410856187343597, |
| "learning_rate": 3.056273219710565e-05, |
| "loss": 0.0355, |
| "step": 12270 |
| }, |
| { |
| "epoch": 11.77372962607862, |
| "grad_norm": 0.278154581785202, |
| "learning_rate": 3.048258186038129e-05, |
| "loss": 0.038, |
| "step": 12280 |
| }, |
| { |
| "epoch": 11.783317353787153, |
| "grad_norm": 0.4203621745109558, |
| "learning_rate": 3.040249064817176e-05, |
| "loss": 0.0338, |
| "step": 12290 |
| }, |
| { |
| "epoch": 11.792905081495686, |
| "grad_norm": 0.29441940784454346, |
| "learning_rate": 3.0322458803098973e-05, |
| "loss": 0.027, |
| "step": 12300 |
| }, |
| { |
| "epoch": 11.80249280920422, |
| "grad_norm": 0.2775827646255493, |
| "learning_rate": 3.0242486567605068e-05, |
| "loss": 0.031, |
| "step": 12310 |
| }, |
| { |
| "epoch": 11.812080536912752, |
| "grad_norm": 0.38520553708076477, |
| "learning_rate": 3.016257418395152e-05, |
| "loss": 0.0333, |
| "step": 12320 |
| }, |
| { |
| "epoch": 11.821668264621284, |
| "grad_norm": 0.26599544286727905, |
| "learning_rate": 3.008272189421861e-05, |
| "loss": 0.0301, |
| "step": 12330 |
| }, |
| { |
| "epoch": 11.831255992329817, |
| "grad_norm": 0.22733962535858154, |
| "learning_rate": 3.0002929940304498e-05, |
| "loss": 0.0298, |
| "step": 12340 |
| }, |
| { |
| "epoch": 11.84084372003835, |
| "grad_norm": 0.27661770582199097, |
| "learning_rate": 2.992319856392457e-05, |
| "loss": 0.0342, |
| "step": 12350 |
| }, |
| { |
| "epoch": 11.850431447746884, |
| "grad_norm": 0.26731380820274353, |
| "learning_rate": 2.9843528006610733e-05, |
| "loss": 0.0295, |
| "step": 12360 |
| }, |
| { |
| "epoch": 11.860019175455417, |
| "grad_norm": 0.3973303437232971, |
| "learning_rate": 2.976391850971065e-05, |
| "loss": 0.0301, |
| "step": 12370 |
| }, |
| { |
| "epoch": 11.86960690316395, |
| "grad_norm": 0.3120301067829132, |
| "learning_rate": 2.968437031438698e-05, |
| "loss": 0.0348, |
| "step": 12380 |
| }, |
| { |
| "epoch": 11.879194630872483, |
| "grad_norm": 0.2932593524456024, |
| "learning_rate": 2.9604883661616702e-05, |
| "loss": 0.0308, |
| "step": 12390 |
| }, |
| { |
| "epoch": 11.888782358581016, |
| "grad_norm": 0.2067721039056778, |
| "learning_rate": 2.9525458792190365e-05, |
| "loss": 0.0323, |
| "step": 12400 |
| }, |
| { |
| "epoch": 11.89837008628955, |
| "grad_norm": 0.30877119302749634, |
| "learning_rate": 2.9446095946711367e-05, |
| "loss": 0.0336, |
| "step": 12410 |
| }, |
| { |
| "epoch": 11.907957813998083, |
| "grad_norm": 0.1372332125902176, |
| "learning_rate": 2.93667953655952e-05, |
| "loss": 0.0341, |
| "step": 12420 |
| }, |
| { |
| "epoch": 11.917545541706616, |
| "grad_norm": 0.2722005844116211, |
| "learning_rate": 2.9287557289068736e-05, |
| "loss": 0.0347, |
| "step": 12430 |
| }, |
| { |
| "epoch": 11.92713326941515, |
| "grad_norm": 0.35675281286239624, |
| "learning_rate": 2.9208381957169485e-05, |
| "loss": 0.0354, |
| "step": 12440 |
| }, |
| { |
| "epoch": 11.936720997123683, |
| "grad_norm": 0.4129658639431, |
| "learning_rate": 2.9129269609744935e-05, |
| "loss": 0.0235, |
| "step": 12450 |
| }, |
| { |
| "epoch": 11.946308724832214, |
| "grad_norm": 0.23059901595115662, |
| "learning_rate": 2.905022048645172e-05, |
| "loss": 0.0361, |
| "step": 12460 |
| }, |
| { |
| "epoch": 11.955896452540747, |
| "grad_norm": 0.20640157163143158, |
| "learning_rate": 2.8971234826754983e-05, |
| "loss": 0.0306, |
| "step": 12470 |
| }, |
| { |
| "epoch": 11.96548418024928, |
| "grad_norm": 0.27325066924095154, |
| "learning_rate": 2.8892312869927578e-05, |
| "loss": 0.033, |
| "step": 12480 |
| }, |
| { |
| "epoch": 11.975071907957814, |
| "grad_norm": 0.2237732708454132, |
| "learning_rate": 2.881345485504945e-05, |
| "loss": 0.0309, |
| "step": 12490 |
| }, |
| { |
| "epoch": 11.984659635666347, |
| "grad_norm": 0.2271834760904312, |
| "learning_rate": 2.8734661021006747e-05, |
| "loss": 0.0267, |
| "step": 12500 |
| }, |
| { |
| "epoch": 11.99424736337488, |
| "grad_norm": 0.27549734711647034, |
| "learning_rate": 2.8655931606491294e-05, |
| "loss": 0.0338, |
| "step": 12510 |
| }, |
| { |
| "epoch": 12.003835091083413, |
| "grad_norm": 0.19603657722473145, |
| "learning_rate": 2.8577266849999672e-05, |
| "loss": 0.0303, |
| "step": 12520 |
| }, |
| { |
| "epoch": 12.013422818791947, |
| "grad_norm": 0.1858394742012024, |
| "learning_rate": 2.849866698983267e-05, |
| "loss": 0.0255, |
| "step": 12530 |
| }, |
| { |
| "epoch": 12.02301054650048, |
| "grad_norm": 0.17287525534629822, |
| "learning_rate": 2.8420132264094468e-05, |
| "loss": 0.0297, |
| "step": 12540 |
| }, |
| { |
| "epoch": 12.032598274209013, |
| "grad_norm": 0.32775846123695374, |
| "learning_rate": 2.83416629106919e-05, |
| "loss": 0.0345, |
| "step": 12550 |
| }, |
| { |
| "epoch": 12.042186001917546, |
| "grad_norm": 0.17536644637584686, |
| "learning_rate": 2.8263259167333777e-05, |
| "loss": 0.0286, |
| "step": 12560 |
| }, |
| { |
| "epoch": 12.05177372962608, |
| "grad_norm": 0.18874387443065643, |
| "learning_rate": 2.818492127153018e-05, |
| "loss": 0.0293, |
| "step": 12570 |
| }, |
| { |
| "epoch": 12.06136145733461, |
| "grad_norm": 0.1686885803937912, |
| "learning_rate": 2.8106649460591716e-05, |
| "loss": 0.0302, |
| "step": 12580 |
| }, |
| { |
| "epoch": 12.070949185043144, |
| "grad_norm": 0.14021116495132446, |
| "learning_rate": 2.802844397162877e-05, |
| "loss": 0.0321, |
| "step": 12590 |
| }, |
| { |
| "epoch": 12.080536912751677, |
| "grad_norm": 0.32412388920783997, |
| "learning_rate": 2.7950305041550818e-05, |
| "loss": 0.0337, |
| "step": 12600 |
| }, |
| { |
| "epoch": 12.09012464046021, |
| "grad_norm": 0.2775496244430542, |
| "learning_rate": 2.7872232907065738e-05, |
| "loss": 0.0348, |
| "step": 12610 |
| }, |
| { |
| "epoch": 12.099712368168744, |
| "grad_norm": 0.20718041062355042, |
| "learning_rate": 2.7794227804679063e-05, |
| "loss": 0.0318, |
| "step": 12620 |
| }, |
| { |
| "epoch": 12.109300095877277, |
| "grad_norm": 0.14198093116283417, |
| "learning_rate": 2.7716289970693236e-05, |
| "loss": 0.0285, |
| "step": 12630 |
| }, |
| { |
| "epoch": 12.11888782358581, |
| "grad_norm": 0.23473426699638367, |
| "learning_rate": 2.7638419641206914e-05, |
| "loss": 0.0311, |
| "step": 12640 |
| }, |
| { |
| "epoch": 12.128475551294343, |
| "grad_norm": 0.22687584161758423, |
| "learning_rate": 2.7560617052114297e-05, |
| "loss": 0.0265, |
| "step": 12650 |
| }, |
| { |
| "epoch": 12.138063279002877, |
| "grad_norm": 0.22875012457370758, |
| "learning_rate": 2.7482882439104385e-05, |
| "loss": 0.0324, |
| "step": 12660 |
| }, |
| { |
| "epoch": 12.14765100671141, |
| "grad_norm": 0.2869175970554352, |
| "learning_rate": 2.740521603766022e-05, |
| "loss": 0.0343, |
| "step": 12670 |
| }, |
| { |
| "epoch": 12.157238734419943, |
| "grad_norm": 0.24454490840435028, |
| "learning_rate": 2.7327618083058192e-05, |
| "loss": 0.0354, |
| "step": 12680 |
| }, |
| { |
| "epoch": 12.166826462128476, |
| "grad_norm": 0.26888319849967957, |
| "learning_rate": 2.7250088810367404e-05, |
| "loss": 0.0317, |
| "step": 12690 |
| }, |
| { |
| "epoch": 12.176414189837008, |
| "grad_norm": 0.2190038412809372, |
| "learning_rate": 2.7172628454448888e-05, |
| "loss": 0.0394, |
| "step": 12700 |
| }, |
| { |
| "epoch": 12.186001917545541, |
| "grad_norm": 0.1673816591501236, |
| "learning_rate": 2.7095237249954875e-05, |
| "loss": 0.0272, |
| "step": 12710 |
| }, |
| { |
| "epoch": 12.195589645254074, |
| "grad_norm": 0.32721394300460815, |
| "learning_rate": 2.7017915431328078e-05, |
| "loss": 0.0341, |
| "step": 12720 |
| }, |
| { |
| "epoch": 12.205177372962607, |
| "grad_norm": 0.2936406135559082, |
| "learning_rate": 2.6940663232801144e-05, |
| "loss": 0.0294, |
| "step": 12730 |
| }, |
| { |
| "epoch": 12.21476510067114, |
| "grad_norm": 3.8611295223236084, |
| "learning_rate": 2.6863480888395714e-05, |
| "loss": 0.0293, |
| "step": 12740 |
| }, |
| { |
| "epoch": 12.224352828379674, |
| "grad_norm": 0.16587217152118683, |
| "learning_rate": 2.6786368631921836e-05, |
| "loss": 0.03, |
| "step": 12750 |
| }, |
| { |
| "epoch": 12.233940556088207, |
| "grad_norm": 0.5451092720031738, |
| "learning_rate": 2.6709326696977215e-05, |
| "loss": 0.0325, |
| "step": 12760 |
| }, |
| { |
| "epoch": 12.24352828379674, |
| "grad_norm": 0.20002365112304688, |
| "learning_rate": 2.6632355316946643e-05, |
| "loss": 0.0255, |
| "step": 12770 |
| }, |
| { |
| "epoch": 12.253116011505274, |
| "grad_norm": 0.8898112773895264, |
| "learning_rate": 2.655545472500105e-05, |
| "loss": 0.0348, |
| "step": 12780 |
| }, |
| { |
| "epoch": 12.262703739213807, |
| "grad_norm": 0.3279706835746765, |
| "learning_rate": 2.647862515409697e-05, |
| "loss": 0.0259, |
| "step": 12790 |
| }, |
| { |
| "epoch": 12.27229146692234, |
| "grad_norm": 0.2899661958217621, |
| "learning_rate": 2.6401866836975795e-05, |
| "loss": 0.0375, |
| "step": 12800 |
| }, |
| { |
| "epoch": 12.281879194630873, |
| "grad_norm": 0.2332329899072647, |
| "learning_rate": 2.632518000616312e-05, |
| "loss": 0.0319, |
| "step": 12810 |
| }, |
| { |
| "epoch": 12.291466922339406, |
| "grad_norm": 0.23844292759895325, |
| "learning_rate": 2.6248564893967886e-05, |
| "loss": 0.0344, |
| "step": 12820 |
| }, |
| { |
| "epoch": 12.301054650047938, |
| "grad_norm": 0.20757047832012177, |
| "learning_rate": 2.617202173248181e-05, |
| "loss": 0.0365, |
| "step": 12830 |
| }, |
| { |
| "epoch": 12.310642377756471, |
| "grad_norm": 0.23326794803142548, |
| "learning_rate": 2.609555075357869e-05, |
| "loss": 0.0385, |
| "step": 12840 |
| }, |
| { |
| "epoch": 12.320230105465004, |
| "grad_norm": 0.20900526642799377, |
| "learning_rate": 2.6019152188913638e-05, |
| "loss": 0.0333, |
| "step": 12850 |
| }, |
| { |
| "epoch": 12.329817833173538, |
| "grad_norm": 0.2453479766845703, |
| "learning_rate": 2.5942826269922376e-05, |
| "loss": 0.0317, |
| "step": 12860 |
| }, |
| { |
| "epoch": 12.33940556088207, |
| "grad_norm": 0.45544683933258057, |
| "learning_rate": 2.5866573227820557e-05, |
| "loss": 0.0299, |
| "step": 12870 |
| }, |
| { |
| "epoch": 12.348993288590604, |
| "grad_norm": 0.31227871775627136, |
| "learning_rate": 2.5790393293603097e-05, |
| "loss": 0.029, |
| "step": 12880 |
| }, |
| { |
| "epoch": 12.358581016299137, |
| "grad_norm": 0.32639333605766296, |
| "learning_rate": 2.571428669804346e-05, |
| "loss": 0.0323, |
| "step": 12890 |
| }, |
| { |
| "epoch": 12.36816874400767, |
| "grad_norm": 0.3351771831512451, |
| "learning_rate": 2.563825367169289e-05, |
| "loss": 0.0304, |
| "step": 12900 |
| }, |
| { |
| "epoch": 12.377756471716204, |
| "grad_norm": 0.47458702325820923, |
| "learning_rate": 2.5562294444879787e-05, |
| "loss": 0.03, |
| "step": 12910 |
| }, |
| { |
| "epoch": 12.387344199424737, |
| "grad_norm": 0.2465980499982834, |
| "learning_rate": 2.5486409247708987e-05, |
| "loss": 0.0378, |
| "step": 12920 |
| }, |
| { |
| "epoch": 12.39693192713327, |
| "grad_norm": 0.42310255765914917, |
| "learning_rate": 2.5410598310061118e-05, |
| "loss": 0.0323, |
| "step": 12930 |
| }, |
| { |
| "epoch": 12.406519654841803, |
| "grad_norm": 1.066576361656189, |
| "learning_rate": 2.5334861861591753e-05, |
| "loss": 0.0347, |
| "step": 12940 |
| }, |
| { |
| "epoch": 12.416107382550335, |
| "grad_norm": 0.24553652107715607, |
| "learning_rate": 2.525920013173091e-05, |
| "loss": 0.0288, |
| "step": 12950 |
| }, |
| { |
| "epoch": 12.425695110258868, |
| "grad_norm": 0.17061471939086914, |
| "learning_rate": 2.51836133496822e-05, |
| "loss": 0.0293, |
| "step": 12960 |
| }, |
| { |
| "epoch": 12.435282837967401, |
| "grad_norm": 0.2702957093715668, |
| "learning_rate": 2.5108101744422197e-05, |
| "loss": 0.0337, |
| "step": 12970 |
| }, |
| { |
| "epoch": 12.444870565675934, |
| "grad_norm": 0.2967221736907959, |
| "learning_rate": 2.5032665544699762e-05, |
| "loss": 0.0388, |
| "step": 12980 |
| }, |
| { |
| "epoch": 12.454458293384468, |
| "grad_norm": 0.18429528176784515, |
| "learning_rate": 2.495730497903535e-05, |
| "loss": 0.0339, |
| "step": 12990 |
| }, |
| { |
| "epoch": 12.464046021093, |
| "grad_norm": 0.4446472227573395, |
| "learning_rate": 2.4882020275720247e-05, |
| "loss": 0.0297, |
| "step": 13000 |
| }, |
| { |
| "epoch": 12.473633748801534, |
| "grad_norm": 0.2481614649295807, |
| "learning_rate": 2.480681166281592e-05, |
| "loss": 0.0332, |
| "step": 13010 |
| }, |
| { |
| "epoch": 12.483221476510067, |
| "grad_norm": 0.4030400216579437, |
| "learning_rate": 2.4731679368153392e-05, |
| "loss": 0.0386, |
| "step": 13020 |
| }, |
| { |
| "epoch": 12.4928092042186, |
| "grad_norm": 0.20716169476509094, |
| "learning_rate": 2.4656623619332476e-05, |
| "loss": 0.0289, |
| "step": 13030 |
| }, |
| { |
| "epoch": 12.502396931927134, |
| "grad_norm": 0.18714624643325806, |
| "learning_rate": 2.4581644643721075e-05, |
| "loss": 0.0257, |
| "step": 13040 |
| }, |
| { |
| "epoch": 12.511984659635667, |
| "grad_norm": 0.2566820979118347, |
| "learning_rate": 2.4506742668454514e-05, |
| "loss": 0.0267, |
| "step": 13050 |
| }, |
| { |
| "epoch": 12.5215723873442, |
| "grad_norm": 0.237356036901474, |
| "learning_rate": 2.44319179204349e-05, |
| "loss": 0.0317, |
| "step": 13060 |
| }, |
| { |
| "epoch": 12.531160115052732, |
| "grad_norm": 0.29655054211616516, |
| "learning_rate": 2.4357170626330394e-05, |
| "loss": 0.0328, |
| "step": 13070 |
| }, |
| { |
| "epoch": 12.540747842761265, |
| "grad_norm": 0.29281550645828247, |
| "learning_rate": 2.4282501012574495e-05, |
| "loss": 0.0295, |
| "step": 13080 |
| }, |
| { |
| "epoch": 12.550335570469798, |
| "grad_norm": 0.477317750453949, |
| "learning_rate": 2.4207909305365363e-05, |
| "loss": 0.0353, |
| "step": 13090 |
| }, |
| { |
| "epoch": 12.559923298178331, |
| "grad_norm": 0.2606201767921448, |
| "learning_rate": 2.4133395730665214e-05, |
| "loss": 0.0288, |
| "step": 13100 |
| }, |
| { |
| "epoch": 12.569511025886865, |
| "grad_norm": 0.18180538713932037, |
| "learning_rate": 2.405896051419957e-05, |
| "loss": 0.0349, |
| "step": 13110 |
| }, |
| { |
| "epoch": 12.579098753595398, |
| "grad_norm": 0.3665505349636078, |
| "learning_rate": 2.398460388145653e-05, |
| "loss": 0.0321, |
| "step": 13120 |
| }, |
| { |
| "epoch": 12.588686481303931, |
| "grad_norm": 0.28408095240592957, |
| "learning_rate": 2.3910326057686127e-05, |
| "loss": 0.0359, |
| "step": 13130 |
| }, |
| { |
| "epoch": 12.598274209012464, |
| "grad_norm": 0.19122740626335144, |
| "learning_rate": 2.3836127267899778e-05, |
| "loss": 0.0299, |
| "step": 13140 |
| }, |
| { |
| "epoch": 12.607861936720997, |
| "grad_norm": 0.18212218582630157, |
| "learning_rate": 2.3762007736869353e-05, |
| "loss": 0.0328, |
| "step": 13150 |
| }, |
| { |
| "epoch": 12.61744966442953, |
| "grad_norm": 0.33118176460266113, |
| "learning_rate": 2.3687967689126667e-05, |
| "loss": 0.0291, |
| "step": 13160 |
| }, |
| { |
| "epoch": 12.627037392138064, |
| "grad_norm": 0.43079885840415955, |
| "learning_rate": 2.3614007348962724e-05, |
| "loss": 0.0303, |
| "step": 13170 |
| }, |
| { |
| "epoch": 12.636625119846597, |
| "grad_norm": 0.21110649406909943, |
| "learning_rate": 2.3540126940427166e-05, |
| "loss": 0.0334, |
| "step": 13180 |
| }, |
| { |
| "epoch": 12.64621284755513, |
| "grad_norm": 0.18830737471580505, |
| "learning_rate": 2.3466326687327396e-05, |
| "loss": 0.0316, |
| "step": 13190 |
| }, |
| { |
| "epoch": 12.655800575263662, |
| "grad_norm": 0.33135518431663513, |
| "learning_rate": 2.3392606813228008e-05, |
| "loss": 0.0375, |
| "step": 13200 |
| }, |
| { |
| "epoch": 12.665388302972195, |
| "grad_norm": 0.2647267282009125, |
| "learning_rate": 2.3318967541450153e-05, |
| "loss": 0.0294, |
| "step": 13210 |
| }, |
| { |
| "epoch": 12.674976030680728, |
| "grad_norm": 0.2796458303928375, |
| "learning_rate": 2.3245409095070803e-05, |
| "loss": 0.0282, |
| "step": 13220 |
| }, |
| { |
| "epoch": 12.684563758389261, |
| "grad_norm": 0.31999823451042175, |
| "learning_rate": 2.317193169692205e-05, |
| "loss": 0.0363, |
| "step": 13230 |
| }, |
| { |
| "epoch": 12.694151486097795, |
| "grad_norm": 0.21032322943210602, |
| "learning_rate": 2.3098535569590458e-05, |
| "loss": 0.0341, |
| "step": 13240 |
| }, |
| { |
| "epoch": 12.703739213806328, |
| "grad_norm": 0.31383687257766724, |
| "learning_rate": 2.3025220935416447e-05, |
| "loss": 0.0301, |
| "step": 13250 |
| }, |
| { |
| "epoch": 12.713326941514861, |
| "grad_norm": 0.4095149040222168, |
| "learning_rate": 2.2951988016493548e-05, |
| "loss": 0.036, |
| "step": 13260 |
| }, |
| { |
| "epoch": 12.722914669223394, |
| "grad_norm": 0.21426613628864288, |
| "learning_rate": 2.2878837034667737e-05, |
| "loss": 0.0346, |
| "step": 13270 |
| }, |
| { |
| "epoch": 12.732502396931928, |
| "grad_norm": 0.312098890542984, |
| "learning_rate": 2.2805768211536758e-05, |
| "loss": 0.0342, |
| "step": 13280 |
| }, |
| { |
| "epoch": 12.74209012464046, |
| "grad_norm": 0.2564839720726013, |
| "learning_rate": 2.273278176844951e-05, |
| "loss": 0.0323, |
| "step": 13290 |
| }, |
| { |
| "epoch": 12.751677852348994, |
| "grad_norm": 0.314685583114624, |
| "learning_rate": 2.2659877926505353e-05, |
| "loss": 0.0382, |
| "step": 13300 |
| }, |
| { |
| "epoch": 12.761265580057525, |
| "grad_norm": 0.1301986277103424, |
| "learning_rate": 2.2587056906553348e-05, |
| "loss": 0.034, |
| "step": 13310 |
| }, |
| { |
| "epoch": 12.770853307766059, |
| "grad_norm": 0.23595231771469116, |
| "learning_rate": 2.251431892919171e-05, |
| "loss": 0.0293, |
| "step": 13320 |
| }, |
| { |
| "epoch": 12.780441035474592, |
| "grad_norm": 0.23706960678100586, |
| "learning_rate": 2.2441664214767085e-05, |
| "loss": 0.0355, |
| "step": 13330 |
| }, |
| { |
| "epoch": 12.790028763183125, |
| "grad_norm": 0.20160214602947235, |
| "learning_rate": 2.2369092983373912e-05, |
| "loss": 0.0315, |
| "step": 13340 |
| }, |
| { |
| "epoch": 12.799616490891658, |
| "grad_norm": 0.1787547618150711, |
| "learning_rate": 2.2296605454853673e-05, |
| "loss": 0.0314, |
| "step": 13350 |
| }, |
| { |
| "epoch": 12.809204218600192, |
| "grad_norm": 0.36770564317703247, |
| "learning_rate": 2.222420184879437e-05, |
| "loss": 0.0372, |
| "step": 13360 |
| }, |
| { |
| "epoch": 12.818791946308725, |
| "grad_norm": 0.3025970160961151, |
| "learning_rate": 2.2151882384529683e-05, |
| "loss": 0.0255, |
| "step": 13370 |
| }, |
| { |
| "epoch": 12.828379674017258, |
| "grad_norm": 0.25169727206230164, |
| "learning_rate": 2.207964728113848e-05, |
| "loss": 0.0269, |
| "step": 13380 |
| }, |
| { |
| "epoch": 12.837967401725791, |
| "grad_norm": 0.37031155824661255, |
| "learning_rate": 2.200749675744402e-05, |
| "loss": 0.0293, |
| "step": 13390 |
| }, |
| { |
| "epoch": 12.847555129434324, |
| "grad_norm": 0.21579872071743011, |
| "learning_rate": 2.1935431032013388e-05, |
| "loss": 0.0302, |
| "step": 13400 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "grad_norm": 0.20838379859924316, |
| "learning_rate": 2.1863450323156725e-05, |
| "loss": 0.034, |
| "step": 13410 |
| }, |
| { |
| "epoch": 12.86673058485139, |
| "grad_norm": 0.2365337610244751, |
| "learning_rate": 2.179155484892671e-05, |
| "loss": 0.0321, |
| "step": 13420 |
| }, |
| { |
| "epoch": 12.876318312559924, |
| "grad_norm": 0.24535539746284485, |
| "learning_rate": 2.1719744827117737e-05, |
| "loss": 0.0318, |
| "step": 13430 |
| }, |
| { |
| "epoch": 12.885906040268456, |
| "grad_norm": 0.32186776399612427, |
| "learning_rate": 2.1648020475265418e-05, |
| "loss": 0.0353, |
| "step": 13440 |
| }, |
| { |
| "epoch": 12.895493767976989, |
| "grad_norm": 0.2927076518535614, |
| "learning_rate": 2.1576382010645764e-05, |
| "loss": 0.0318, |
| "step": 13450 |
| }, |
| { |
| "epoch": 12.905081495685522, |
| "grad_norm": 0.2444140613079071, |
| "learning_rate": 2.1504829650274672e-05, |
| "loss": 0.034, |
| "step": 13460 |
| }, |
| { |
| "epoch": 12.914669223394055, |
| "grad_norm": 0.17273946106433868, |
| "learning_rate": 2.1433363610907147e-05, |
| "loss": 0.0339, |
| "step": 13470 |
| }, |
| { |
| "epoch": 12.924256951102588, |
| "grad_norm": 0.3511595129966736, |
| "learning_rate": 2.1361984109036765e-05, |
| "loss": 0.0284, |
| "step": 13480 |
| }, |
| { |
| "epoch": 12.933844678811122, |
| "grad_norm": 0.21930259466171265, |
| "learning_rate": 2.1290691360894872e-05, |
| "loss": 0.0337, |
| "step": 13490 |
| }, |
| { |
| "epoch": 12.943432406519655, |
| "grad_norm": 0.13534465432167053, |
| "learning_rate": 2.121948558245008e-05, |
| "loss": 0.0325, |
| "step": 13500 |
| }, |
| { |
| "epoch": 12.953020134228188, |
| "grad_norm": 0.25757452845573425, |
| "learning_rate": 2.1148366989407496e-05, |
| "loss": 0.0344, |
| "step": 13510 |
| }, |
| { |
| "epoch": 12.962607861936721, |
| "grad_norm": 0.3126337230205536, |
| "learning_rate": 2.1077335797208153e-05, |
| "loss": 0.0266, |
| "step": 13520 |
| }, |
| { |
| "epoch": 12.972195589645255, |
| "grad_norm": 0.2144749015569687, |
| "learning_rate": 2.100639222102827e-05, |
| "loss": 0.0296, |
| "step": 13530 |
| }, |
| { |
| "epoch": 12.981783317353788, |
| "grad_norm": 0.33655446767807007, |
| "learning_rate": 2.0935536475778682e-05, |
| "loss": 0.0319, |
| "step": 13540 |
| }, |
| { |
| "epoch": 12.991371045062321, |
| "grad_norm": 0.16992558538913727, |
| "learning_rate": 2.0864768776104183e-05, |
| "loss": 0.0335, |
| "step": 13550 |
| }, |
| { |
| "epoch": 13.000958772770852, |
| "grad_norm": 0.2082756608724594, |
| "learning_rate": 2.079408933638279e-05, |
| "loss": 0.0338, |
| "step": 13560 |
| }, |
| { |
| "epoch": 13.010546500479386, |
| "grad_norm": 0.2862843871116638, |
| "learning_rate": 2.0723498370725162e-05, |
| "loss": 0.0289, |
| "step": 13570 |
| }, |
| { |
| "epoch": 13.020134228187919, |
| "grad_norm": 0.29127344489097595, |
| "learning_rate": 2.0652996092973974e-05, |
| "loss": 0.0379, |
| "step": 13580 |
| }, |
| { |
| "epoch": 13.029721955896452, |
| "grad_norm": 0.1825907677412033, |
| "learning_rate": 2.0582582716703243e-05, |
| "loss": 0.0267, |
| "step": 13590 |
| }, |
| { |
| "epoch": 13.039309683604985, |
| "grad_norm": 0.20657765865325928, |
| "learning_rate": 2.0512258455217636e-05, |
| "loss": 0.0337, |
| "step": 13600 |
| }, |
| { |
| "epoch": 13.048897411313519, |
| "grad_norm": 0.20046214759349823, |
| "learning_rate": 2.044202352155185e-05, |
| "loss": 0.0256, |
| "step": 13610 |
| }, |
| { |
| "epoch": 13.058485139022052, |
| "grad_norm": 0.23749665915966034, |
| "learning_rate": 2.0371878128470047e-05, |
| "loss": 0.033, |
| "step": 13620 |
| }, |
| { |
| "epoch": 13.068072866730585, |
| "grad_norm": 0.1981140673160553, |
| "learning_rate": 2.0301822488465106e-05, |
| "loss": 0.0323, |
| "step": 13630 |
| }, |
| { |
| "epoch": 13.077660594439118, |
| "grad_norm": 0.3064008951187134, |
| "learning_rate": 2.0231856813757995e-05, |
| "loss": 0.029, |
| "step": 13640 |
| }, |
| { |
| "epoch": 13.087248322147651, |
| "grad_norm": 0.3160218596458435, |
| "learning_rate": 2.016198131629716e-05, |
| "loss": 0.0317, |
| "step": 13650 |
| }, |
| { |
| "epoch": 13.096836049856185, |
| "grad_norm": 0.1925330013036728, |
| "learning_rate": 2.0092196207757886e-05, |
| "loss": 0.0308, |
| "step": 13660 |
| }, |
| { |
| "epoch": 13.106423777564718, |
| "grad_norm": 0.2060590237379074, |
| "learning_rate": 2.002250169954165e-05, |
| "loss": 0.0352, |
| "step": 13670 |
| }, |
| { |
| "epoch": 13.116011505273251, |
| "grad_norm": 0.21879933774471283, |
| "learning_rate": 1.9952898002775444e-05, |
| "loss": 0.0262, |
| "step": 13680 |
| }, |
| { |
| "epoch": 13.125599232981783, |
| "grad_norm": 0.22108188271522522, |
| "learning_rate": 1.9883385328311155e-05, |
| "loss": 0.0333, |
| "step": 13690 |
| }, |
| { |
| "epoch": 13.135186960690316, |
| "grad_norm": 0.26251569390296936, |
| "learning_rate": 1.981396388672496e-05, |
| "loss": 0.0314, |
| "step": 13700 |
| }, |
| { |
| "epoch": 13.144774688398849, |
| "grad_norm": 0.29389551281929016, |
| "learning_rate": 1.9744633888316684e-05, |
| "loss": 0.0333, |
| "step": 13710 |
| }, |
| { |
| "epoch": 13.154362416107382, |
| "grad_norm": 0.1754542887210846, |
| "learning_rate": 1.9675395543109087e-05, |
| "loss": 0.0306, |
| "step": 13720 |
| }, |
| { |
| "epoch": 13.163950143815915, |
| "grad_norm": 0.2529279589653015, |
| "learning_rate": 1.9606249060847275e-05, |
| "loss": 0.029, |
| "step": 13730 |
| }, |
| { |
| "epoch": 13.173537871524449, |
| "grad_norm": 0.25833970308303833, |
| "learning_rate": 1.9537194650998176e-05, |
| "loss": 0.0257, |
| "step": 13740 |
| }, |
| { |
| "epoch": 13.183125599232982, |
| "grad_norm": 0.2809722423553467, |
| "learning_rate": 1.9468232522749685e-05, |
| "loss": 0.03, |
| "step": 13750 |
| }, |
| { |
| "epoch": 13.192713326941515, |
| "grad_norm": 0.2745196521282196, |
| "learning_rate": 1.9399362885010186e-05, |
| "loss": 0.0259, |
| "step": 13760 |
| }, |
| { |
| "epoch": 13.202301054650048, |
| "grad_norm": 0.26047447323799133, |
| "learning_rate": 1.9330585946407896e-05, |
| "loss": 0.0293, |
| "step": 13770 |
| }, |
| { |
| "epoch": 13.211888782358582, |
| "grad_norm": 0.2309299260377884, |
| "learning_rate": 1.9261901915290222e-05, |
| "loss": 0.0263, |
| "step": 13780 |
| }, |
| { |
| "epoch": 13.221476510067115, |
| "grad_norm": 0.19574059545993805, |
| "learning_rate": 1.9193310999723086e-05, |
| "loss": 0.0256, |
| "step": 13790 |
| }, |
| { |
| "epoch": 13.231064237775648, |
| "grad_norm": 0.24411630630493164, |
| "learning_rate": 1.9124813407490345e-05, |
| "loss": 0.0266, |
| "step": 13800 |
| }, |
| { |
| "epoch": 13.24065196548418, |
| "grad_norm": 0.2317860871553421, |
| "learning_rate": 1.9056409346093167e-05, |
| "loss": 0.0362, |
| "step": 13810 |
| }, |
| { |
| "epoch": 13.250239693192713, |
| "grad_norm": 0.34288397431373596, |
| "learning_rate": 1.89880990227494e-05, |
| "loss": 0.031, |
| "step": 13820 |
| }, |
| { |
| "epoch": 13.259827420901246, |
| "grad_norm": 0.22115236520767212, |
| "learning_rate": 1.8919882644392894e-05, |
| "loss": 0.0303, |
| "step": 13830 |
| }, |
| { |
| "epoch": 13.269415148609779, |
| "grad_norm": 0.1675620973110199, |
| "learning_rate": 1.8851760417672897e-05, |
| "loss": 0.0267, |
| "step": 13840 |
| }, |
| { |
| "epoch": 13.279002876318312, |
| "grad_norm": 0.22504985332489014, |
| "learning_rate": 1.8783732548953487e-05, |
| "loss": 0.03, |
| "step": 13850 |
| }, |
| { |
| "epoch": 13.288590604026846, |
| "grad_norm": 0.2568277418613434, |
| "learning_rate": 1.87157992443129e-05, |
| "loss": 0.0347, |
| "step": 13860 |
| }, |
| { |
| "epoch": 13.298178331735379, |
| "grad_norm": 0.24830462038516998, |
| "learning_rate": 1.8647960709542866e-05, |
| "loss": 0.0313, |
| "step": 13870 |
| }, |
| { |
| "epoch": 13.307766059443912, |
| "grad_norm": 0.1982988864183426, |
| "learning_rate": 1.8580217150148034e-05, |
| "loss": 0.0286, |
| "step": 13880 |
| }, |
| { |
| "epoch": 13.317353787152445, |
| "grad_norm": 0.17509537935256958, |
| "learning_rate": 1.851256877134538e-05, |
| "loss": 0.0283, |
| "step": 13890 |
| }, |
| { |
| "epoch": 13.326941514860978, |
| "grad_norm": 0.27267399430274963, |
| "learning_rate": 1.8445015778063528e-05, |
| "loss": 0.0308, |
| "step": 13900 |
| }, |
| { |
| "epoch": 13.336529242569512, |
| "grad_norm": 0.2444014698266983, |
| "learning_rate": 1.8377558374942143e-05, |
| "loss": 0.0335, |
| "step": 13910 |
| }, |
| { |
| "epoch": 13.346116970278045, |
| "grad_norm": 0.4355910122394562, |
| "learning_rate": 1.831019676633129e-05, |
| "loss": 0.0326, |
| "step": 13920 |
| }, |
| { |
| "epoch": 13.355704697986576, |
| "grad_norm": 0.6526142954826355, |
| "learning_rate": 1.8242931156290893e-05, |
| "loss": 0.0299, |
| "step": 13930 |
| }, |
| { |
| "epoch": 13.36529242569511, |
| "grad_norm": 0.20145297050476074, |
| "learning_rate": 1.8175761748590063e-05, |
| "loss": 0.0315, |
| "step": 13940 |
| }, |
| { |
| "epoch": 13.374880153403643, |
| "grad_norm": 0.22952324151992798, |
| "learning_rate": 1.8108688746706427e-05, |
| "loss": 0.031, |
| "step": 13950 |
| }, |
| { |
| "epoch": 13.384467881112176, |
| "grad_norm": 0.38137954473495483, |
| "learning_rate": 1.8041712353825635e-05, |
| "loss": 0.0387, |
| "step": 13960 |
| }, |
| { |
| "epoch": 13.39405560882071, |
| "grad_norm": 0.2673424482345581, |
| "learning_rate": 1.7974832772840617e-05, |
| "loss": 0.0272, |
| "step": 13970 |
| }, |
| { |
| "epoch": 13.403643336529242, |
| "grad_norm": 0.2189689427614212, |
| "learning_rate": 1.790805020635109e-05, |
| "loss": 0.0317, |
| "step": 13980 |
| }, |
| { |
| "epoch": 13.413231064237776, |
| "grad_norm": 1.2192716598510742, |
| "learning_rate": 1.7841364856662824e-05, |
| "loss": 0.0258, |
| "step": 13990 |
| }, |
| { |
| "epoch": 13.422818791946309, |
| "grad_norm": 0.13329686224460602, |
| "learning_rate": 1.7774776925787136e-05, |
| "loss": 0.0257, |
| "step": 14000 |
| }, |
| { |
| "epoch": 13.432406519654842, |
| "grad_norm": 0.2741002142429352, |
| "learning_rate": 1.7708286615440183e-05, |
| "loss": 0.0271, |
| "step": 14010 |
| }, |
| { |
| "epoch": 13.441994247363375, |
| "grad_norm": 0.7737520337104797, |
| "learning_rate": 1.764189412704247e-05, |
| "loss": 0.0283, |
| "step": 14020 |
| }, |
| { |
| "epoch": 13.451581975071909, |
| "grad_norm": 0.24316097795963287, |
| "learning_rate": 1.7575599661718068e-05, |
| "loss": 0.0302, |
| "step": 14030 |
| }, |
| { |
| "epoch": 13.461169702780442, |
| "grad_norm": 0.23543784022331238, |
| "learning_rate": 1.7509403420294208e-05, |
| "loss": 0.0311, |
| "step": 14040 |
| }, |
| { |
| "epoch": 13.470757430488973, |
| "grad_norm": 0.19010919332504272, |
| "learning_rate": 1.7443305603300497e-05, |
| "loss": 0.0276, |
| "step": 14050 |
| }, |
| { |
| "epoch": 13.480345158197506, |
| "grad_norm": 0.1994113028049469, |
| "learning_rate": 1.7377306410968396e-05, |
| "loss": 0.0298, |
| "step": 14060 |
| }, |
| { |
| "epoch": 13.48993288590604, |
| "grad_norm": 0.30696478486061096, |
| "learning_rate": 1.731140604323063e-05, |
| "loss": 0.0275, |
| "step": 14070 |
| }, |
| { |
| "epoch": 13.499520613614573, |
| "grad_norm": 0.3128091096878052, |
| "learning_rate": 1.7245604699720535e-05, |
| "loss": 0.0272, |
| "step": 14080 |
| }, |
| { |
| "epoch": 13.509108341323106, |
| "grad_norm": 2.206577777862549, |
| "learning_rate": 1.7179902579771474e-05, |
| "loss": 0.0326, |
| "step": 14090 |
| }, |
| { |
| "epoch": 13.51869606903164, |
| "grad_norm": 0.18835577368736267, |
| "learning_rate": 1.711429988241619e-05, |
| "loss": 0.0276, |
| "step": 14100 |
| }, |
| { |
| "epoch": 13.528283796740173, |
| "grad_norm": 0.2255256026983261, |
| "learning_rate": 1.7048796806386304e-05, |
| "loss": 0.0301, |
| "step": 14110 |
| }, |
| { |
| "epoch": 13.537871524448706, |
| "grad_norm": 0.3144644796848297, |
| "learning_rate": 1.6983393550111648e-05, |
| "loss": 0.0324, |
| "step": 14120 |
| }, |
| { |
| "epoch": 13.547459252157239, |
| "grad_norm": 0.20487931370735168, |
| "learning_rate": 1.691809031171962e-05, |
| "loss": 0.0352, |
| "step": 14130 |
| }, |
| { |
| "epoch": 13.557046979865772, |
| "grad_norm": 0.22863590717315674, |
| "learning_rate": 1.6852887289034632e-05, |
| "loss": 0.0343, |
| "step": 14140 |
| }, |
| { |
| "epoch": 13.566634707574305, |
| "grad_norm": 0.30829718708992004, |
| "learning_rate": 1.67877846795776e-05, |
| "loss": 0.0342, |
| "step": 14150 |
| }, |
| { |
| "epoch": 13.576222435282839, |
| "grad_norm": 0.2026831954717636, |
| "learning_rate": 1.672278268056516e-05, |
| "loss": 0.0266, |
| "step": 14160 |
| }, |
| { |
| "epoch": 13.585810162991372, |
| "grad_norm": 0.18998700380325317, |
| "learning_rate": 1.6657881488909192e-05, |
| "loss": 0.0316, |
| "step": 14170 |
| }, |
| { |
| "epoch": 13.595397890699903, |
| "grad_norm": 0.2338184267282486, |
| "learning_rate": 1.659308130121622e-05, |
| "loss": 0.0315, |
| "step": 14180 |
| }, |
| { |
| "epoch": 13.604985618408437, |
| "grad_norm": 0.421129047870636, |
| "learning_rate": 1.6528382313786784e-05, |
| "loss": 0.0322, |
| "step": 14190 |
| }, |
| { |
| "epoch": 13.61457334611697, |
| "grad_norm": 0.28092893958091736, |
| "learning_rate": 1.6463784722614845e-05, |
| "loss": 0.0269, |
| "step": 14200 |
| }, |
| { |
| "epoch": 13.624161073825503, |
| "grad_norm": 0.19112944602966309, |
| "learning_rate": 1.6399288723387195e-05, |
| "loss": 0.0258, |
| "step": 14210 |
| }, |
| { |
| "epoch": 13.633748801534036, |
| "grad_norm": 0.286045640707016, |
| "learning_rate": 1.63348945114829e-05, |
| "loss": 0.0324, |
| "step": 14220 |
| }, |
| { |
| "epoch": 13.64333652924257, |
| "grad_norm": 0.280977338552475, |
| "learning_rate": 1.6270602281972686e-05, |
| "loss": 0.0265, |
| "step": 14230 |
| }, |
| { |
| "epoch": 13.652924256951103, |
| "grad_norm": 0.28009748458862305, |
| "learning_rate": 1.6206412229618307e-05, |
| "loss": 0.034, |
| "step": 14240 |
| }, |
| { |
| "epoch": 13.662511984659636, |
| "grad_norm": 0.2950078845024109, |
| "learning_rate": 1.6142324548871978e-05, |
| "loss": 0.0332, |
| "step": 14250 |
| }, |
| { |
| "epoch": 13.67209971236817, |
| "grad_norm": 0.19593513011932373, |
| "learning_rate": 1.607833943387585e-05, |
| "loss": 0.0322, |
| "step": 14260 |
| }, |
| { |
| "epoch": 13.681687440076702, |
| "grad_norm": 0.3256717026233673, |
| "learning_rate": 1.6014457078461353e-05, |
| "loss": 0.0311, |
| "step": 14270 |
| }, |
| { |
| "epoch": 13.691275167785236, |
| "grad_norm": 0.48480740189552307, |
| "learning_rate": 1.59506776761486e-05, |
| "loss": 0.0265, |
| "step": 14280 |
| }, |
| { |
| "epoch": 13.700862895493769, |
| "grad_norm": 0.17794422805309296, |
| "learning_rate": 1.588700142014583e-05, |
| "loss": 0.0302, |
| "step": 14290 |
| }, |
| { |
| "epoch": 13.7104506232023, |
| "grad_norm": 0.21641989052295685, |
| "learning_rate": 1.5823428503348846e-05, |
| "loss": 0.0269, |
| "step": 14300 |
| }, |
| { |
| "epoch": 13.720038350910833, |
| "grad_norm": 0.21487939357757568, |
| "learning_rate": 1.57599591183404e-05, |
| "loss": 0.0333, |
| "step": 14310 |
| }, |
| { |
| "epoch": 13.729626078619367, |
| "grad_norm": 0.20198583602905273, |
| "learning_rate": 1.569659345738959e-05, |
| "loss": 0.0316, |
| "step": 14320 |
| }, |
| { |
| "epoch": 13.7392138063279, |
| "grad_norm": 0.24818021059036255, |
| "learning_rate": 1.5633331712451287e-05, |
| "loss": 0.0322, |
| "step": 14330 |
| }, |
| { |
| "epoch": 13.748801534036433, |
| "grad_norm": 0.3211008906364441, |
| "learning_rate": 1.5570174075165617e-05, |
| "loss": 0.0286, |
| "step": 14340 |
| }, |
| { |
| "epoch": 13.758389261744966, |
| "grad_norm": 0.27913060784339905, |
| "learning_rate": 1.5507120736857316e-05, |
| "loss": 0.0309, |
| "step": 14350 |
| }, |
| { |
| "epoch": 13.7679769894535, |
| "grad_norm": 0.3094828724861145, |
| "learning_rate": 1.5444171888535127e-05, |
| "loss": 0.0262, |
| "step": 14360 |
| }, |
| { |
| "epoch": 13.777564717162033, |
| "grad_norm": 0.26376375555992126, |
| "learning_rate": 1.538132772089131e-05, |
| "loss": 0.0312, |
| "step": 14370 |
| }, |
| { |
| "epoch": 13.787152444870566, |
| "grad_norm": 0.27103152871131897, |
| "learning_rate": 1.531858842430096e-05, |
| "loss": 0.029, |
| "step": 14380 |
| }, |
| { |
| "epoch": 13.7967401725791, |
| "grad_norm": 0.2528936564922333, |
| "learning_rate": 1.5255954188821554e-05, |
| "loss": 0.0302, |
| "step": 14390 |
| }, |
| { |
| "epoch": 13.806327900287632, |
| "grad_norm": 0.2022869884967804, |
| "learning_rate": 1.519342520419223e-05, |
| "loss": 0.028, |
| "step": 14400 |
| }, |
| { |
| "epoch": 13.815915627996166, |
| "grad_norm": 0.2736548185348511, |
| "learning_rate": 1.5131001659833349e-05, |
| "loss": 0.0391, |
| "step": 14410 |
| }, |
| { |
| "epoch": 13.825503355704697, |
| "grad_norm": 0.20340123772621155, |
| "learning_rate": 1.5068683744845802e-05, |
| "loss": 0.0259, |
| "step": 14420 |
| }, |
| { |
| "epoch": 13.83509108341323, |
| "grad_norm": 0.30253875255584717, |
| "learning_rate": 1.5006471648010567e-05, |
| "loss": 0.0318, |
| "step": 14430 |
| }, |
| { |
| "epoch": 13.844678811121764, |
| "grad_norm": 0.18290819227695465, |
| "learning_rate": 1.4944365557787982e-05, |
| "loss": 0.0266, |
| "step": 14440 |
| }, |
| { |
| "epoch": 13.854266538830297, |
| "grad_norm": 0.17378397285938263, |
| "learning_rate": 1.4882365662317338e-05, |
| "loss": 0.0307, |
| "step": 14450 |
| }, |
| { |
| "epoch": 13.86385426653883, |
| "grad_norm": 0.17450757324695587, |
| "learning_rate": 1.4820472149416154e-05, |
| "loss": 0.0375, |
| "step": 14460 |
| }, |
| { |
| "epoch": 13.873441994247363, |
| "grad_norm": 0.17673359811306, |
| "learning_rate": 1.4758685206579754e-05, |
| "loss": 0.0336, |
| "step": 14470 |
| }, |
| { |
| "epoch": 13.883029721955896, |
| "grad_norm": 0.17782671749591827, |
| "learning_rate": 1.4697005020980547e-05, |
| "loss": 0.0264, |
| "step": 14480 |
| }, |
| { |
| "epoch": 13.89261744966443, |
| "grad_norm": 0.22997714579105377, |
| "learning_rate": 1.4635431779467628e-05, |
| "loss": 0.0364, |
| "step": 14490 |
| }, |
| { |
| "epoch": 13.902205177372963, |
| "grad_norm": 0.23629331588745117, |
| "learning_rate": 1.4573965668566037e-05, |
| "loss": 0.0293, |
| "step": 14500 |
| }, |
| { |
| "epoch": 13.911792905081496, |
| "grad_norm": 0.2348259836435318, |
| "learning_rate": 1.4512606874476348e-05, |
| "loss": 0.0296, |
| "step": 14510 |
| }, |
| { |
| "epoch": 13.92138063279003, |
| "grad_norm": 0.2225087732076645, |
| "learning_rate": 1.4451355583074027e-05, |
| "loss": 0.0286, |
| "step": 14520 |
| }, |
| { |
| "epoch": 13.930968360498563, |
| "grad_norm": 0.23287685215473175, |
| "learning_rate": 1.4390211979908847e-05, |
| "loss": 0.0279, |
| "step": 14530 |
| }, |
| { |
| "epoch": 13.940556088207096, |
| "grad_norm": 0.19362808763980865, |
| "learning_rate": 1.4329176250204369e-05, |
| "loss": 0.0334, |
| "step": 14540 |
| }, |
| { |
| "epoch": 13.950143815915627, |
| "grad_norm": 0.25659292936325073, |
| "learning_rate": 1.4268248578857384e-05, |
| "loss": 0.0286, |
| "step": 14550 |
| }, |
| { |
| "epoch": 13.95973154362416, |
| "grad_norm": 0.19965949654579163, |
| "learning_rate": 1.4207429150437368e-05, |
| "loss": 0.0336, |
| "step": 14560 |
| }, |
| { |
| "epoch": 13.969319271332694, |
| "grad_norm": 0.21127323806285858, |
| "learning_rate": 1.4146718149185833e-05, |
| "loss": 0.0311, |
| "step": 14570 |
| }, |
| { |
| "epoch": 13.978906999041227, |
| "grad_norm": 0.2175043374300003, |
| "learning_rate": 1.408611575901585e-05, |
| "loss": 0.0232, |
| "step": 14580 |
| }, |
| { |
| "epoch": 13.98849472674976, |
| "grad_norm": 0.2855774462223053, |
| "learning_rate": 1.4025622163511498e-05, |
| "loss": 0.03, |
| "step": 14590 |
| }, |
| { |
| "epoch": 13.998082454458293, |
| "grad_norm": 0.27606961131095886, |
| "learning_rate": 1.3965237545927274e-05, |
| "loss": 0.0285, |
| "step": 14600 |
| }, |
| { |
| "epoch": 14.007670182166827, |
| "grad_norm": 0.20237654447555542, |
| "learning_rate": 1.3904962089187529e-05, |
| "loss": 0.0263, |
| "step": 14610 |
| }, |
| { |
| "epoch": 14.01725790987536, |
| "grad_norm": 0.17577792704105377, |
| "learning_rate": 1.3844795975885921e-05, |
| "loss": 0.028, |
| "step": 14620 |
| }, |
| { |
| "epoch": 14.026845637583893, |
| "grad_norm": 0.24930806457996368, |
| "learning_rate": 1.3784739388284911e-05, |
| "loss": 0.0308, |
| "step": 14630 |
| }, |
| { |
| "epoch": 14.036433365292426, |
| "grad_norm": 0.16480274498462677, |
| "learning_rate": 1.372479250831516e-05, |
| "loss": 0.0301, |
| "step": 14640 |
| }, |
| { |
| "epoch": 14.04602109300096, |
| "grad_norm": 0.20912165939807892, |
| "learning_rate": 1.3664955517574968e-05, |
| "loss": 0.0278, |
| "step": 14650 |
| }, |
| { |
| "epoch": 14.055608820709493, |
| "grad_norm": 0.3317655622959137, |
| "learning_rate": 1.3605228597329738e-05, |
| "loss": 0.0317, |
| "step": 14660 |
| }, |
| { |
| "epoch": 14.065196548418024, |
| "grad_norm": 0.240800142288208, |
| "learning_rate": 1.3545611928511475e-05, |
| "loss": 0.0352, |
| "step": 14670 |
| }, |
| { |
| "epoch": 14.074784276126557, |
| "grad_norm": 0.2574955224990845, |
| "learning_rate": 1.3486105691718187e-05, |
| "loss": 0.0272, |
| "step": 14680 |
| }, |
| { |
| "epoch": 14.08437200383509, |
| "grad_norm": 0.26954057812690735, |
| "learning_rate": 1.3426710067213322e-05, |
| "loss": 0.0309, |
| "step": 14690 |
| }, |
| { |
| "epoch": 14.093959731543624, |
| "grad_norm": 0.23546206951141357, |
| "learning_rate": 1.336742523492523e-05, |
| "loss": 0.0332, |
| "step": 14700 |
| }, |
| { |
| "epoch": 14.103547459252157, |
| "grad_norm": 0.2285180389881134, |
| "learning_rate": 1.3308251374446734e-05, |
| "loss": 0.0436, |
| "step": 14710 |
| }, |
| { |
| "epoch": 14.11313518696069, |
| "grad_norm": 0.22198130190372467, |
| "learning_rate": 1.324918866503439e-05, |
| "loss": 0.0283, |
| "step": 14720 |
| }, |
| { |
| "epoch": 14.122722914669223, |
| "grad_norm": 0.37202128767967224, |
| "learning_rate": 1.3190237285608076e-05, |
| "loss": 0.0296, |
| "step": 14730 |
| }, |
| { |
| "epoch": 14.132310642377757, |
| "grad_norm": 0.2728140652179718, |
| "learning_rate": 1.3131397414750385e-05, |
| "loss": 0.0313, |
| "step": 14740 |
| }, |
| { |
| "epoch": 14.14189837008629, |
| "grad_norm": 0.19201789796352386, |
| "learning_rate": 1.3072669230706197e-05, |
| "loss": 0.0315, |
| "step": 14750 |
| }, |
| { |
| "epoch": 14.151486097794823, |
| "grad_norm": 0.2704322040081024, |
| "learning_rate": 1.3014052911381974e-05, |
| "loss": 0.0279, |
| "step": 14760 |
| }, |
| { |
| "epoch": 14.161073825503356, |
| "grad_norm": 0.23162490129470825, |
| "learning_rate": 1.2955548634345327e-05, |
| "loss": 0.0288, |
| "step": 14770 |
| }, |
| { |
| "epoch": 14.17066155321189, |
| "grad_norm": 0.1527073085308075, |
| "learning_rate": 1.289715657682447e-05, |
| "loss": 0.0287, |
| "step": 14780 |
| }, |
| { |
| "epoch": 14.180249280920421, |
| "grad_norm": 0.48836442828178406, |
| "learning_rate": 1.2838876915707681e-05, |
| "loss": 0.0334, |
| "step": 14790 |
| }, |
| { |
| "epoch": 14.189837008628954, |
| "grad_norm": 0.22852776944637299, |
| "learning_rate": 1.2780709827542708e-05, |
| "loss": 0.0301, |
| "step": 14800 |
| }, |
| { |
| "epoch": 14.199424736337487, |
| "grad_norm": 1.632561445236206, |
| "learning_rate": 1.2722655488536294e-05, |
| "loss": 0.0296, |
| "step": 14810 |
| }, |
| { |
| "epoch": 14.20901246404602, |
| "grad_norm": 0.20910300314426422, |
| "learning_rate": 1.2664714074553652e-05, |
| "loss": 0.0277, |
| "step": 14820 |
| }, |
| { |
| "epoch": 14.218600191754554, |
| "grad_norm": 0.284138023853302, |
| "learning_rate": 1.260688576111791e-05, |
| "loss": 0.0275, |
| "step": 14830 |
| }, |
| { |
| "epoch": 14.228187919463087, |
| "grad_norm": 0.24799588322639465, |
| "learning_rate": 1.2549170723409549e-05, |
| "loss": 0.0291, |
| "step": 14840 |
| }, |
| { |
| "epoch": 14.23777564717162, |
| "grad_norm": 0.18639959394931793, |
| "learning_rate": 1.2491569136265896e-05, |
| "loss": 0.0284, |
| "step": 14850 |
| }, |
| { |
| "epoch": 14.247363374880154, |
| "grad_norm": 0.19724729657173157, |
| "learning_rate": 1.243408117418064e-05, |
| "loss": 0.0266, |
| "step": 14860 |
| }, |
| { |
| "epoch": 14.256951102588687, |
| "grad_norm": 0.1451575756072998, |
| "learning_rate": 1.2376707011303257e-05, |
| "loss": 0.0313, |
| "step": 14870 |
| }, |
| { |
| "epoch": 14.26653883029722, |
| "grad_norm": 0.13136418163776398, |
| "learning_rate": 1.2319446821438458e-05, |
| "loss": 0.0257, |
| "step": 14880 |
| }, |
| { |
| "epoch": 14.276126558005753, |
| "grad_norm": 0.212480828166008, |
| "learning_rate": 1.2262300778045693e-05, |
| "loss": 0.0309, |
| "step": 14890 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.179280087351799, |
| "learning_rate": 1.220526905423866e-05, |
| "loss": 0.0334, |
| "step": 14900 |
| }, |
| { |
| "epoch": 14.29530201342282, |
| "grad_norm": 0.19260522723197937, |
| "learning_rate": 1.2148351822784748e-05, |
| "loss": 0.0321, |
| "step": 14910 |
| }, |
| { |
| "epoch": 14.304889741131351, |
| "grad_norm": 0.2079414278268814, |
| "learning_rate": 1.2091549256104457e-05, |
| "loss": 0.0314, |
| "step": 14920 |
| }, |
| { |
| "epoch": 14.314477468839884, |
| "grad_norm": 0.1942739635705948, |
| "learning_rate": 1.2034861526270996e-05, |
| "loss": 0.0307, |
| "step": 14930 |
| }, |
| { |
| "epoch": 14.324065196548418, |
| "grad_norm": 0.28928378224372864, |
| "learning_rate": 1.1978288805009641e-05, |
| "loss": 0.0267, |
| "step": 14940 |
| }, |
| { |
| "epoch": 14.33365292425695, |
| "grad_norm": 0.3712955415248871, |
| "learning_rate": 1.192183126369732e-05, |
| "loss": 0.0329, |
| "step": 14950 |
| }, |
| { |
| "epoch": 14.343240651965484, |
| "grad_norm": 0.22929075360298157, |
| "learning_rate": 1.1865489073361996e-05, |
| "loss": 0.0264, |
| "step": 14960 |
| }, |
| { |
| "epoch": 14.352828379674017, |
| "grad_norm": 0.31317007541656494, |
| "learning_rate": 1.1809262404682247e-05, |
| "loss": 0.0242, |
| "step": 14970 |
| }, |
| { |
| "epoch": 14.36241610738255, |
| "grad_norm": 0.5237254500389099, |
| "learning_rate": 1.1753151427986646e-05, |
| "loss": 0.0292, |
| "step": 14980 |
| }, |
| { |
| "epoch": 14.372003835091084, |
| "grad_norm": 0.21789228916168213, |
| "learning_rate": 1.169715631325336e-05, |
| "loss": 0.0314, |
| "step": 14990 |
| }, |
| { |
| "epoch": 14.381591562799617, |
| "grad_norm": 0.29379501938819885, |
| "learning_rate": 1.1641277230109492e-05, |
| "loss": 0.0332, |
| "step": 15000 |
| }, |
| { |
| "epoch": 14.39117929050815, |
| "grad_norm": 0.17771072685718536, |
| "learning_rate": 1.1585514347830738e-05, |
| "loss": 0.0267, |
| "step": 15010 |
| }, |
| { |
| "epoch": 14.400767018216683, |
| "grad_norm": 0.24794255197048187, |
| "learning_rate": 1.1529867835340707e-05, |
| "loss": 0.0267, |
| "step": 15020 |
| }, |
| { |
| "epoch": 14.410354745925215, |
| "grad_norm": 0.21468493342399597, |
| "learning_rate": 1.1474337861210543e-05, |
| "loss": 0.0267, |
| "step": 15030 |
| }, |
| { |
| "epoch": 14.419942473633748, |
| "grad_norm": 0.17512547969818115, |
| "learning_rate": 1.1418924593658314e-05, |
| "loss": 0.0239, |
| "step": 15040 |
| }, |
| { |
| "epoch": 14.429530201342281, |
| "grad_norm": 0.2626974284648895, |
| "learning_rate": 1.1363628200548593e-05, |
| "loss": 0.0328, |
| "step": 15050 |
| }, |
| { |
| "epoch": 14.439117929050814, |
| "grad_norm": 0.21883651614189148, |
| "learning_rate": 1.1308448849391846e-05, |
| "loss": 0.0283, |
| "step": 15060 |
| }, |
| { |
| "epoch": 14.448705656759348, |
| "grad_norm": 0.2517321705818176, |
| "learning_rate": 1.1253386707344044e-05, |
| "loss": 0.0319, |
| "step": 15070 |
| }, |
| { |
| "epoch": 14.458293384467881, |
| "grad_norm": 0.23790787160396576, |
| "learning_rate": 1.1198441941206033e-05, |
| "loss": 0.0254, |
| "step": 15080 |
| }, |
| { |
| "epoch": 14.467881112176414, |
| "grad_norm": 0.2755306363105774, |
| "learning_rate": 1.1143614717423145e-05, |
| "loss": 0.0297, |
| "step": 15090 |
| }, |
| { |
| "epoch": 14.477468839884947, |
| "grad_norm": 0.17343682050704956, |
| "learning_rate": 1.1088905202084604e-05, |
| "loss": 0.0271, |
| "step": 15100 |
| }, |
| { |
| "epoch": 14.48705656759348, |
| "grad_norm": 0.4037168323993683, |
| "learning_rate": 1.1034313560923032e-05, |
| "loss": 0.0318, |
| "step": 15110 |
| }, |
| { |
| "epoch": 14.496644295302014, |
| "grad_norm": 0.25027063488960266, |
| "learning_rate": 1.097983995931407e-05, |
| "loss": 0.0344, |
| "step": 15120 |
| }, |
| { |
| "epoch": 14.506232023010547, |
| "grad_norm": 0.2531662583351135, |
| "learning_rate": 1.0925484562275678e-05, |
| "loss": 0.0336, |
| "step": 15130 |
| }, |
| { |
| "epoch": 14.51581975071908, |
| "grad_norm": 0.27917400002479553, |
| "learning_rate": 1.0871247534467788e-05, |
| "loss": 0.0316, |
| "step": 15140 |
| }, |
| { |
| "epoch": 14.525407478427613, |
| "grad_norm": 0.26147523522377014, |
| "learning_rate": 1.0817129040191698e-05, |
| "loss": 0.0278, |
| "step": 15150 |
| }, |
| { |
| "epoch": 14.534995206136145, |
| "grad_norm": 0.24168430268764496, |
| "learning_rate": 1.076312924338973e-05, |
| "loss": 0.03, |
| "step": 15160 |
| }, |
| { |
| "epoch": 14.544582933844678, |
| "grad_norm": 0.17934760451316833, |
| "learning_rate": 1.0709248307644559e-05, |
| "loss": 0.0275, |
| "step": 15170 |
| }, |
| { |
| "epoch": 14.554170661553211, |
| "grad_norm": 0.38495177030563354, |
| "learning_rate": 1.0655486396178782e-05, |
| "loss": 0.0317, |
| "step": 15180 |
| }, |
| { |
| "epoch": 14.563758389261745, |
| "grad_norm": 0.22225984930992126, |
| "learning_rate": 1.0601843671854477e-05, |
| "loss": 0.0312, |
| "step": 15190 |
| }, |
| { |
| "epoch": 14.573346116970278, |
| "grad_norm": 0.29296278953552246, |
| "learning_rate": 1.0548320297172665e-05, |
| "loss": 0.0315, |
| "step": 15200 |
| }, |
| { |
| "epoch": 14.582933844678811, |
| "grad_norm": 0.3371207118034363, |
| "learning_rate": 1.0494916434272783e-05, |
| "loss": 0.0299, |
| "step": 15210 |
| }, |
| { |
| "epoch": 14.592521572387344, |
| "grad_norm": 0.220375657081604, |
| "learning_rate": 1.0441632244932237e-05, |
| "loss": 0.0265, |
| "step": 15220 |
| }, |
| { |
| "epoch": 14.602109300095877, |
| "grad_norm": 0.1987174153327942, |
| "learning_rate": 1.0388467890565928e-05, |
| "loss": 0.0261, |
| "step": 15230 |
| }, |
| { |
| "epoch": 14.61169702780441, |
| "grad_norm": 0.25363320112228394, |
| "learning_rate": 1.0335423532225735e-05, |
| "loss": 0.0301, |
| "step": 15240 |
| }, |
| { |
| "epoch": 14.621284755512944, |
| "grad_norm": 0.22231195867061615, |
| "learning_rate": 1.028249933060001e-05, |
| "loss": 0.0353, |
| "step": 15250 |
| }, |
| { |
| "epoch": 14.630872483221477, |
| "grad_norm": 0.20641197264194489, |
| "learning_rate": 1.022969544601311e-05, |
| "loss": 0.0254, |
| "step": 15260 |
| }, |
| { |
| "epoch": 14.64046021093001, |
| "grad_norm": 0.25588056445121765, |
| "learning_rate": 1.0177012038424927e-05, |
| "loss": 0.0327, |
| "step": 15270 |
| }, |
| { |
| "epoch": 14.650047938638544, |
| "grad_norm": 0.3196217715740204, |
| "learning_rate": 1.0124449267430414e-05, |
| "loss": 0.0306, |
| "step": 15280 |
| }, |
| { |
| "epoch": 14.659635666347075, |
| "grad_norm": 0.37711241841316223, |
| "learning_rate": 1.0072007292259029e-05, |
| "loss": 0.0314, |
| "step": 15290 |
| }, |
| { |
| "epoch": 14.669223394055608, |
| "grad_norm": 0.299496591091156, |
| "learning_rate": 1.0019686271774314e-05, |
| "loss": 0.0273, |
| "step": 15300 |
| }, |
| { |
| "epoch": 14.678811121764141, |
| "grad_norm": 0.20070233941078186, |
| "learning_rate": 9.967486364473416e-06, |
| "loss": 0.0348, |
| "step": 15310 |
| }, |
| { |
| "epoch": 14.688398849472675, |
| "grad_norm": 0.1786354035139084, |
| "learning_rate": 9.915407728486603e-06, |
| "loss": 0.0315, |
| "step": 15320 |
| }, |
| { |
| "epoch": 14.697986577181208, |
| "grad_norm": 0.19913482666015625, |
| "learning_rate": 9.863450521576729e-06, |
| "loss": 0.0332, |
| "step": 15330 |
| }, |
| { |
| "epoch": 14.707574304889741, |
| "grad_norm": 0.26217663288116455, |
| "learning_rate": 9.81161490113885e-06, |
| "loss": 0.0299, |
| "step": 15340 |
| }, |
| { |
| "epoch": 14.717162032598274, |
| "grad_norm": 0.17626221477985382, |
| "learning_rate": 9.759901024199642e-06, |
| "loss": 0.0258, |
| "step": 15350 |
| }, |
| { |
| "epoch": 14.726749760306808, |
| "grad_norm": 0.5230224132537842, |
| "learning_rate": 9.708309047417041e-06, |
| "loss": 0.0286, |
| "step": 15360 |
| }, |
| { |
| "epoch": 14.73633748801534, |
| "grad_norm": 0.19318176805973053, |
| "learning_rate": 9.656839127079659e-06, |
| "loss": 0.0254, |
| "step": 15370 |
| }, |
| { |
| "epoch": 14.745925215723874, |
| "grad_norm": 0.30321067571640015, |
| "learning_rate": 9.6054914191064e-06, |
| "loss": 0.0304, |
| "step": 15380 |
| }, |
| { |
| "epoch": 14.755512943432407, |
| "grad_norm": 0.2519323229789734, |
| "learning_rate": 9.554266079045909e-06, |
| "loss": 0.0325, |
| "step": 15390 |
| }, |
| { |
| "epoch": 14.765100671140939, |
| "grad_norm": 0.24592278897762299, |
| "learning_rate": 9.503163262076181e-06, |
| "loss": 0.0336, |
| "step": 15400 |
| }, |
| { |
| "epoch": 14.774688398849472, |
| "grad_norm": 0.19091877341270447, |
| "learning_rate": 9.452183123004e-06, |
| "loss": 0.0247, |
| "step": 15410 |
| }, |
| { |
| "epoch": 14.784276126558005, |
| "grad_norm": 0.26081383228302, |
| "learning_rate": 9.401325816264573e-06, |
| "loss": 0.0333, |
| "step": 15420 |
| }, |
| { |
| "epoch": 14.793863854266538, |
| "grad_norm": 0.27854666113853455, |
| "learning_rate": 9.350591495920952e-06, |
| "loss": 0.024, |
| "step": 15430 |
| }, |
| { |
| "epoch": 14.803451581975072, |
| "grad_norm": 0.36169877648353577, |
| "learning_rate": 9.299980315663686e-06, |
| "loss": 0.031, |
| "step": 15440 |
| }, |
| { |
| "epoch": 14.813039309683605, |
| "grad_norm": 0.18000735342502594, |
| "learning_rate": 9.24949242881023e-06, |
| "loss": 0.0289, |
| "step": 15450 |
| }, |
| { |
| "epoch": 14.822627037392138, |
| "grad_norm": 0.25608521699905396, |
| "learning_rate": 9.199127988304607e-06, |
| "loss": 0.0284, |
| "step": 15460 |
| }, |
| { |
| "epoch": 14.832214765100671, |
| "grad_norm": 0.2771013379096985, |
| "learning_rate": 9.148887146716812e-06, |
| "loss": 0.0283, |
| "step": 15470 |
| }, |
| { |
| "epoch": 14.841802492809204, |
| "grad_norm": 0.17078572511672974, |
| "learning_rate": 9.09877005624249e-06, |
| "loss": 0.0294, |
| "step": 15480 |
| }, |
| { |
| "epoch": 14.851390220517738, |
| "grad_norm": 0.17408467829227448, |
| "learning_rate": 9.048776868702347e-06, |
| "loss": 0.0255, |
| "step": 15490 |
| }, |
| { |
| "epoch": 14.860977948226271, |
| "grad_norm": 0.20527216792106628, |
| "learning_rate": 8.998907735541789e-06, |
| "loss": 0.0329, |
| "step": 15500 |
| }, |
| { |
| "epoch": 14.870565675934804, |
| "grad_norm": 0.23558159172534943, |
| "learning_rate": 8.94916280783038e-06, |
| "loss": 0.0294, |
| "step": 15510 |
| }, |
| { |
| "epoch": 14.880153403643337, |
| "grad_norm": 0.16163650155067444, |
| "learning_rate": 8.89954223626146e-06, |
| "loss": 0.0264, |
| "step": 15520 |
| }, |
| { |
| "epoch": 14.889741131351869, |
| "grad_norm": 0.2564382255077362, |
| "learning_rate": 8.850046171151666e-06, |
| "loss": 0.0332, |
| "step": 15530 |
| }, |
| { |
| "epoch": 14.899328859060402, |
| "grad_norm": 0.2050989419221878, |
| "learning_rate": 8.80067476244042e-06, |
| "loss": 0.0307, |
| "step": 15540 |
| }, |
| { |
| "epoch": 14.908916586768935, |
| "grad_norm": 0.18448740243911743, |
| "learning_rate": 8.751428159689528e-06, |
| "loss": 0.0306, |
| "step": 15550 |
| }, |
| { |
| "epoch": 14.918504314477468, |
| "grad_norm": 0.29133155941963196, |
| "learning_rate": 8.702306512082753e-06, |
| "loss": 0.0243, |
| "step": 15560 |
| }, |
| { |
| "epoch": 14.928092042186002, |
| "grad_norm": 0.141392782330513, |
| "learning_rate": 8.653309968425322e-06, |
| "loss": 0.0242, |
| "step": 15570 |
| }, |
| { |
| "epoch": 14.937679769894535, |
| "grad_norm": 0.21134333312511444, |
| "learning_rate": 8.60443867714345e-06, |
| "loss": 0.0318, |
| "step": 15580 |
| }, |
| { |
| "epoch": 14.947267497603068, |
| "grad_norm": 0.2590806484222412, |
| "learning_rate": 8.55569278628393e-06, |
| "loss": 0.0253, |
| "step": 15590 |
| }, |
| { |
| "epoch": 14.956855225311601, |
| "grad_norm": 0.21871857345104218, |
| "learning_rate": 8.507072443513702e-06, |
| "loss": 0.0258, |
| "step": 15600 |
| }, |
| { |
| "epoch": 14.966442953020135, |
| "grad_norm": 0.25187286734580994, |
| "learning_rate": 8.458577796119382e-06, |
| "loss": 0.03, |
| "step": 15610 |
| }, |
| { |
| "epoch": 14.976030680728668, |
| "grad_norm": 0.17888393998146057, |
| "learning_rate": 8.410208991006784e-06, |
| "loss": 0.0274, |
| "step": 15620 |
| }, |
| { |
| "epoch": 14.985618408437201, |
| "grad_norm": 0.1486871838569641, |
| "learning_rate": 8.361966174700514e-06, |
| "loss": 0.0269, |
| "step": 15630 |
| }, |
| { |
| "epoch": 14.995206136145734, |
| "grad_norm": 0.6585232019424438, |
| "learning_rate": 8.31384949334353e-06, |
| "loss": 0.0294, |
| "step": 15640 |
| }, |
| { |
| "epoch": 15.004793863854266, |
| "grad_norm": 0.36748427152633667, |
| "learning_rate": 8.265859092696686e-06, |
| "loss": 0.0318, |
| "step": 15650 |
| }, |
| { |
| "epoch": 15.014381591562799, |
| "grad_norm": 0.22082515060901642, |
| "learning_rate": 8.217995118138294e-06, |
| "loss": 0.0294, |
| "step": 15660 |
| }, |
| { |
| "epoch": 15.023969319271332, |
| "grad_norm": 0.1767498254776001, |
| "learning_rate": 8.170257714663642e-06, |
| "loss": 0.0275, |
| "step": 15670 |
| }, |
| { |
| "epoch": 15.033557046979865, |
| "grad_norm": 0.24185898900032043, |
| "learning_rate": 8.12264702688465e-06, |
| "loss": 0.0279, |
| "step": 15680 |
| }, |
| { |
| "epoch": 15.043144774688399, |
| "grad_norm": 0.22703923285007477, |
| "learning_rate": 8.075163199029357e-06, |
| "loss": 0.0268, |
| "step": 15690 |
| }, |
| { |
| "epoch": 15.052732502396932, |
| "grad_norm": 0.2051907479763031, |
| "learning_rate": 8.027806374941481e-06, |
| "loss": 0.0272, |
| "step": 15700 |
| }, |
| { |
| "epoch": 15.062320230105465, |
| "grad_norm": 0.24761435389518738, |
| "learning_rate": 7.980576698080005e-06, |
| "loss": 0.0301, |
| "step": 15710 |
| }, |
| { |
| "epoch": 15.071907957813998, |
| "grad_norm": 0.17438143491744995, |
| "learning_rate": 7.933474311518796e-06, |
| "loss": 0.0351, |
| "step": 15720 |
| }, |
| { |
| "epoch": 15.081495685522532, |
| "grad_norm": 0.20341135561466217, |
| "learning_rate": 7.88649935794606e-06, |
| "loss": 0.0264, |
| "step": 15730 |
| }, |
| { |
| "epoch": 15.091083413231065, |
| "grad_norm": 0.24047966301441193, |
| "learning_rate": 7.83965197966397e-06, |
| "loss": 0.0268, |
| "step": 15740 |
| }, |
| { |
| "epoch": 15.100671140939598, |
| "grad_norm": 0.19311171770095825, |
| "learning_rate": 7.792932318588264e-06, |
| "loss": 0.033, |
| "step": 15750 |
| }, |
| { |
| "epoch": 15.110258868648131, |
| "grad_norm": 0.18407687544822693, |
| "learning_rate": 7.746340516247779e-06, |
| "loss": 0.0243, |
| "step": 15760 |
| }, |
| { |
| "epoch": 15.119846596356663, |
| "grad_norm": 0.21947818994522095, |
| "learning_rate": 7.69987671378401e-06, |
| "loss": 0.0255, |
| "step": 15770 |
| }, |
| { |
| "epoch": 15.129434324065196, |
| "grad_norm": 0.4175131916999817, |
| "learning_rate": 7.653541051950692e-06, |
| "loss": 0.0245, |
| "step": 15780 |
| }, |
| { |
| "epoch": 15.139022051773729, |
| "grad_norm": 0.29046544432640076, |
| "learning_rate": 7.607333671113409e-06, |
| "loss": 0.0365, |
| "step": 15790 |
| }, |
| { |
| "epoch": 15.148609779482262, |
| "grad_norm": 0.25391921401023865, |
| "learning_rate": 7.561254711249127e-06, |
| "loss": 0.0266, |
| "step": 15800 |
| }, |
| { |
| "epoch": 15.158197507190796, |
| "grad_norm": 0.19595490396022797, |
| "learning_rate": 7.515304311945787e-06, |
| "loss": 0.0306, |
| "step": 15810 |
| }, |
| { |
| "epoch": 15.167785234899329, |
| "grad_norm": 0.1492607444524765, |
| "learning_rate": 7.469482612401857e-06, |
| "loss": 0.0306, |
| "step": 15820 |
| }, |
| { |
| "epoch": 15.177372962607862, |
| "grad_norm": 0.2468632310628891, |
| "learning_rate": 7.423789751425958e-06, |
| "loss": 0.0275, |
| "step": 15830 |
| }, |
| { |
| "epoch": 15.186960690316395, |
| "grad_norm": 0.20901519060134888, |
| "learning_rate": 7.378225867436428e-06, |
| "loss": 0.0252, |
| "step": 15840 |
| }, |
| { |
| "epoch": 15.196548418024928, |
| "grad_norm": 0.28785982728004456, |
| "learning_rate": 7.332791098460867e-06, |
| "loss": 0.0326, |
| "step": 15850 |
| }, |
| { |
| "epoch": 15.206136145733462, |
| "grad_norm": 0.2834322154521942, |
| "learning_rate": 7.287485582135728e-06, |
| "loss": 0.0302, |
| "step": 15860 |
| }, |
| { |
| "epoch": 15.215723873441995, |
| "grad_norm": 0.24561063945293427, |
| "learning_rate": 7.242309455705959e-06, |
| "loss": 0.0292, |
| "step": 15870 |
| }, |
| { |
| "epoch": 15.225311601150528, |
| "grad_norm": 0.23040306568145752, |
| "learning_rate": 7.197262856024539e-06, |
| "loss": 0.0246, |
| "step": 15880 |
| }, |
| { |
| "epoch": 15.234899328859061, |
| "grad_norm": 0.22045479714870453, |
| "learning_rate": 7.152345919552045e-06, |
| "loss": 0.0314, |
| "step": 15890 |
| }, |
| { |
| "epoch": 15.244487056567593, |
| "grad_norm": 0.2748197913169861, |
| "learning_rate": 7.107558782356255e-06, |
| "loss": 0.0292, |
| "step": 15900 |
| }, |
| { |
| "epoch": 15.254074784276126, |
| "grad_norm": 0.2709030210971832, |
| "learning_rate": 7.0629015801117744e-06, |
| "loss": 0.0299, |
| "step": 15910 |
| }, |
| { |
| "epoch": 15.26366251198466, |
| "grad_norm": 0.2666435241699219, |
| "learning_rate": 7.018374448099596e-06, |
| "loss": 0.0324, |
| "step": 15920 |
| }, |
| { |
| "epoch": 15.273250239693192, |
| "grad_norm": 0.32848596572875977, |
| "learning_rate": 6.973977521206654e-06, |
| "loss": 0.0344, |
| "step": 15930 |
| }, |
| { |
| "epoch": 15.282837967401726, |
| "grad_norm": 0.23068153858184814, |
| "learning_rate": 6.929710933925487e-06, |
| "loss": 0.0262, |
| "step": 15940 |
| }, |
| { |
| "epoch": 15.292425695110259, |
| "grad_norm": 0.24479450285434723, |
| "learning_rate": 6.885574820353752e-06, |
| "loss": 0.0269, |
| "step": 15950 |
| }, |
| { |
| "epoch": 15.302013422818792, |
| "grad_norm": 0.21294337511062622, |
| "learning_rate": 6.841569314193902e-06, |
| "loss": 0.0265, |
| "step": 15960 |
| }, |
| { |
| "epoch": 15.311601150527325, |
| "grad_norm": 0.28778862953186035, |
| "learning_rate": 6.797694548752703e-06, |
| "loss": 0.0273, |
| "step": 15970 |
| }, |
| { |
| "epoch": 15.321188878235859, |
| "grad_norm": 0.189237579703331, |
| "learning_rate": 6.753950656940905e-06, |
| "loss": 0.0267, |
| "step": 15980 |
| }, |
| { |
| "epoch": 15.330776605944392, |
| "grad_norm": 0.28015297651290894, |
| "learning_rate": 6.710337771272745e-06, |
| "loss": 0.034, |
| "step": 15990 |
| }, |
| { |
| "epoch": 15.340364333652925, |
| "grad_norm": 0.1625533103942871, |
| "learning_rate": 6.666856023865658e-06, |
| "loss": 0.0233, |
| "step": 16000 |
| }, |
| { |
| "epoch": 15.349952061361458, |
| "grad_norm": 0.21412205696105957, |
| "learning_rate": 6.623505546439773e-06, |
| "loss": 0.0253, |
| "step": 16010 |
| }, |
| { |
| "epoch": 15.35953978906999, |
| "grad_norm": 0.26244086027145386, |
| "learning_rate": 6.580286470317598e-06, |
| "loss": 0.0256, |
| "step": 16020 |
| }, |
| { |
| "epoch": 15.369127516778523, |
| "grad_norm": 0.28637972474098206, |
| "learning_rate": 6.537198926423549e-06, |
| "loss": 0.0283, |
| "step": 16030 |
| }, |
| { |
| "epoch": 15.378715244487056, |
| "grad_norm": 0.2678770124912262, |
| "learning_rate": 6.494243045283621e-06, |
| "loss": 0.0271, |
| "step": 16040 |
| }, |
| { |
| "epoch": 15.38830297219559, |
| "grad_norm": 0.1962299942970276, |
| "learning_rate": 6.45141895702493e-06, |
| "loss": 0.0258, |
| "step": 16050 |
| }, |
| { |
| "epoch": 15.397890699904123, |
| "grad_norm": 0.26651138067245483, |
| "learning_rate": 6.40872679137538e-06, |
| "loss": 0.0276, |
| "step": 16060 |
| }, |
| { |
| "epoch": 15.407478427612656, |
| "grad_norm": 0.23737022280693054, |
| "learning_rate": 6.366166677663204e-06, |
| "loss": 0.0309, |
| "step": 16070 |
| }, |
| { |
| "epoch": 15.417066155321189, |
| "grad_norm": 0.2531161606311798, |
| "learning_rate": 6.323738744816654e-06, |
| "loss": 0.0329, |
| "step": 16080 |
| }, |
| { |
| "epoch": 15.426653883029722, |
| "grad_norm": 0.26035356521606445, |
| "learning_rate": 6.2814431213635065e-06, |
| "loss": 0.0286, |
| "step": 16090 |
| }, |
| { |
| "epoch": 15.436241610738255, |
| "grad_norm": 0.2163701057434082, |
| "learning_rate": 6.239279935430786e-06, |
| "loss": 0.027, |
| "step": 16100 |
| }, |
| { |
| "epoch": 15.445829338446789, |
| "grad_norm": 0.18169005215168, |
| "learning_rate": 6.197249314744275e-06, |
| "loss": 0.024, |
| "step": 16110 |
| }, |
| { |
| "epoch": 15.455417066155322, |
| "grad_norm": 0.24503251910209656, |
| "learning_rate": 6.155351386628205e-06, |
| "loss": 0.0298, |
| "step": 16120 |
| }, |
| { |
| "epoch": 15.465004793863855, |
| "grad_norm": 0.19895343482494354, |
| "learning_rate": 6.113586278004835e-06, |
| "loss": 0.0233, |
| "step": 16130 |
| }, |
| { |
| "epoch": 15.474592521572387, |
| "grad_norm": 0.2949654459953308, |
| "learning_rate": 6.071954115394063e-06, |
| "loss": 0.0256, |
| "step": 16140 |
| }, |
| { |
| "epoch": 15.48418024928092, |
| "grad_norm": 0.13835924863815308, |
| "learning_rate": 6.030455024913029e-06, |
| "loss": 0.029, |
| "step": 16150 |
| }, |
| { |
| "epoch": 15.493767976989453, |
| "grad_norm": 0.36957499384880066, |
| "learning_rate": 5.989089132275799e-06, |
| "loss": 0.0369, |
| "step": 16160 |
| }, |
| { |
| "epoch": 15.503355704697986, |
| "grad_norm": 0.22811642289161682, |
| "learning_rate": 5.947856562792925e-06, |
| "loss": 0.0306, |
| "step": 16170 |
| }, |
| { |
| "epoch": 15.51294343240652, |
| "grad_norm": 0.3362506330013275, |
| "learning_rate": 5.906757441371069e-06, |
| "loss": 0.0346, |
| "step": 16180 |
| }, |
| { |
| "epoch": 15.522531160115053, |
| "grad_norm": 0.20575332641601562, |
| "learning_rate": 5.865791892512623e-06, |
| "loss": 0.0305, |
| "step": 16190 |
| }, |
| { |
| "epoch": 15.532118887823586, |
| "grad_norm": 0.1870652139186859, |
| "learning_rate": 5.824960040315386e-06, |
| "loss": 0.0253, |
| "step": 16200 |
| }, |
| { |
| "epoch": 15.541706615532119, |
| "grad_norm": 0.4694177508354187, |
| "learning_rate": 5.784262008472124e-06, |
| "loss": 0.0287, |
| "step": 16210 |
| }, |
| { |
| "epoch": 15.551294343240652, |
| "grad_norm": 0.2506779134273529, |
| "learning_rate": 5.7436979202702194e-06, |
| "loss": 0.0331, |
| "step": 16220 |
| }, |
| { |
| "epoch": 15.560882070949186, |
| "grad_norm": 0.18632706999778748, |
| "learning_rate": 5.703267898591275e-06, |
| "loss": 0.0234, |
| "step": 16230 |
| }, |
| { |
| "epoch": 15.570469798657719, |
| "grad_norm": 0.14531591534614563, |
| "learning_rate": 5.662972065910799e-06, |
| "loss": 0.0245, |
| "step": 16240 |
| }, |
| { |
| "epoch": 15.580057526366252, |
| "grad_norm": 0.19370119273662567, |
| "learning_rate": 5.622810544297796e-06, |
| "loss": 0.0262, |
| "step": 16250 |
| }, |
| { |
| "epoch": 15.589645254074785, |
| "grad_norm": 0.2350122630596161, |
| "learning_rate": 5.582783455414375e-06, |
| "loss": 0.0262, |
| "step": 16260 |
| }, |
| { |
| "epoch": 15.599232981783317, |
| "grad_norm": 0.2912338078022003, |
| "learning_rate": 5.5428909205154035e-06, |
| "loss": 0.0284, |
| "step": 16270 |
| }, |
| { |
| "epoch": 15.60882070949185, |
| "grad_norm": 0.28382018208503723, |
| "learning_rate": 5.503133060448168e-06, |
| "loss": 0.0257, |
| "step": 16280 |
| }, |
| { |
| "epoch": 15.618408437200383, |
| "grad_norm": 0.1536964774131775, |
| "learning_rate": 5.463509995651978e-06, |
| "loss": 0.0274, |
| "step": 16290 |
| }, |
| { |
| "epoch": 15.627996164908916, |
| "grad_norm": 0.5844811201095581, |
| "learning_rate": 5.4240218461577894e-06, |
| "loss": 0.0294, |
| "step": 16300 |
| }, |
| { |
| "epoch": 15.63758389261745, |
| "grad_norm": 0.2484215646982193, |
| "learning_rate": 5.384668731587844e-06, |
| "loss": 0.0278, |
| "step": 16310 |
| }, |
| { |
| "epoch": 15.647171620325983, |
| "grad_norm": 0.2738986015319824, |
| "learning_rate": 5.345450771155358e-06, |
| "loss": 0.0271, |
| "step": 16320 |
| }, |
| { |
| "epoch": 15.656759348034516, |
| "grad_norm": 0.23017966747283936, |
| "learning_rate": 5.3063680836641095e-06, |
| "loss": 0.0261, |
| "step": 16330 |
| }, |
| { |
| "epoch": 15.66634707574305, |
| "grad_norm": 0.1773134022951126, |
| "learning_rate": 5.2674207875080595e-06, |
| "loss": 0.03, |
| "step": 16340 |
| }, |
| { |
| "epoch": 15.675934803451582, |
| "grad_norm": 0.1907745748758316, |
| "learning_rate": 5.228609000671081e-06, |
| "loss": 0.0224, |
| "step": 16350 |
| }, |
| { |
| "epoch": 15.685522531160116, |
| "grad_norm": 0.2307148277759552, |
| "learning_rate": 5.1899328407264855e-06, |
| "loss": 0.0294, |
| "step": 16360 |
| }, |
| { |
| "epoch": 15.695110258868649, |
| "grad_norm": 0.3302120566368103, |
| "learning_rate": 5.151392424836782e-06, |
| "loss": 0.0292, |
| "step": 16370 |
| }, |
| { |
| "epoch": 15.70469798657718, |
| "grad_norm": 0.2139192521572113, |
| "learning_rate": 5.112987869753216e-06, |
| "loss": 0.0296, |
| "step": 16380 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "grad_norm": 0.16015082597732544, |
| "learning_rate": 5.074719291815522e-06, |
| "loss": 0.029, |
| "step": 16390 |
| }, |
| { |
| "epoch": 15.723873441994247, |
| "grad_norm": 0.19606702029705048, |
| "learning_rate": 5.036586806951465e-06, |
| "loss": 0.029, |
| "step": 16400 |
| }, |
| { |
| "epoch": 15.73346116970278, |
| "grad_norm": 0.30746451020240784, |
| "learning_rate": 4.998590530676584e-06, |
| "loss": 0.0285, |
| "step": 16410 |
| }, |
| { |
| "epoch": 15.743048897411313, |
| "grad_norm": 0.16113652288913727, |
| "learning_rate": 4.960730578093753e-06, |
| "loss": 0.028, |
| "step": 16420 |
| }, |
| { |
| "epoch": 15.752636625119846, |
| "grad_norm": 0.23624086380004883, |
| "learning_rate": 4.923007063892926e-06, |
| "loss": 0.0251, |
| "step": 16430 |
| }, |
| { |
| "epoch": 15.76222435282838, |
| "grad_norm": 0.19934307038784027, |
| "learning_rate": 4.885420102350696e-06, |
| "loss": 0.0238, |
| "step": 16440 |
| }, |
| { |
| "epoch": 15.771812080536913, |
| "grad_norm": 0.2440912276506424, |
| "learning_rate": 4.847969807330038e-06, |
| "loss": 0.0231, |
| "step": 16450 |
| }, |
| { |
| "epoch": 15.781399808245446, |
| "grad_norm": 0.2768200933933258, |
| "learning_rate": 4.810656292279875e-06, |
| "loss": 0.0268, |
| "step": 16460 |
| }, |
| { |
| "epoch": 15.79098753595398, |
| "grad_norm": 0.29489603638648987, |
| "learning_rate": 4.773479670234821e-06, |
| "loss": 0.0358, |
| "step": 16470 |
| }, |
| { |
| "epoch": 15.800575263662513, |
| "grad_norm": 0.26058635115623474, |
| "learning_rate": 4.7364400538147665e-06, |
| "loss": 0.0272, |
| "step": 16480 |
| }, |
| { |
| "epoch": 15.810162991371046, |
| "grad_norm": 0.19268332421779633, |
| "learning_rate": 4.699537555224598e-06, |
| "loss": 0.028, |
| "step": 16490 |
| }, |
| { |
| "epoch": 15.819750719079579, |
| "grad_norm": 0.27744096517562866, |
| "learning_rate": 4.6627722862537915e-06, |
| "loss": 0.0278, |
| "step": 16500 |
| }, |
| { |
| "epoch": 15.82933844678811, |
| "grad_norm": 0.3575479984283447, |
| "learning_rate": 4.626144358276147e-06, |
| "loss": 0.0275, |
| "step": 16510 |
| }, |
| { |
| "epoch": 15.838926174496644, |
| "grad_norm": 0.20007503032684326, |
| "learning_rate": 4.589653882249378e-06, |
| "loss": 0.0309, |
| "step": 16520 |
| }, |
| { |
| "epoch": 15.848513902205177, |
| "grad_norm": 0.20804741978645325, |
| "learning_rate": 4.553300968714841e-06, |
| "loss": 0.0249, |
| "step": 16530 |
| }, |
| { |
| "epoch": 15.85810162991371, |
| "grad_norm": 0.2726737856864929, |
| "learning_rate": 4.5170857277971765e-06, |
| "loss": 0.0259, |
| "step": 16540 |
| }, |
| { |
| "epoch": 15.867689357622243, |
| "grad_norm": 0.21122261881828308, |
| "learning_rate": 4.48100826920394e-06, |
| "loss": 0.029, |
| "step": 16550 |
| }, |
| { |
| "epoch": 15.877277085330777, |
| "grad_norm": 0.28613051772117615, |
| "learning_rate": 4.4450687022253135e-06, |
| "loss": 0.0255, |
| "step": 16560 |
| }, |
| { |
| "epoch": 15.88686481303931, |
| "grad_norm": 0.2184969037771225, |
| "learning_rate": 4.409267135733764e-06, |
| "loss": 0.0233, |
| "step": 16570 |
| }, |
| { |
| "epoch": 15.896452540747843, |
| "grad_norm": 0.19320517778396606, |
| "learning_rate": 4.37360367818373e-06, |
| "loss": 0.0271, |
| "step": 16580 |
| }, |
| { |
| "epoch": 15.906040268456376, |
| "grad_norm": 0.18892447650432587, |
| "learning_rate": 4.338078437611237e-06, |
| "loss": 0.0265, |
| "step": 16590 |
| }, |
| { |
| "epoch": 15.91562799616491, |
| "grad_norm": 0.23824314773082733, |
| "learning_rate": 4.3026915216336225e-06, |
| "loss": 0.0269, |
| "step": 16600 |
| }, |
| { |
| "epoch": 15.925215723873443, |
| "grad_norm": 0.1431523561477661, |
| "learning_rate": 4.267443037449198e-06, |
| "loss": 0.0269, |
| "step": 16610 |
| }, |
| { |
| "epoch": 15.934803451581976, |
| "grad_norm": 0.22107666730880737, |
| "learning_rate": 4.232333091836932e-06, |
| "loss": 0.0293, |
| "step": 16620 |
| }, |
| { |
| "epoch": 15.944391179290509, |
| "grad_norm": 0.27542436122894287, |
| "learning_rate": 4.197361791156096e-06, |
| "loss": 0.03, |
| "step": 16630 |
| }, |
| { |
| "epoch": 15.95397890699904, |
| "grad_norm": 0.234486922621727, |
| "learning_rate": 4.162529241345958e-06, |
| "loss": 0.0325, |
| "step": 16640 |
| }, |
| { |
| "epoch": 15.963566634707574, |
| "grad_norm": 0.24536362290382385, |
| "learning_rate": 4.127835547925479e-06, |
| "loss": 0.0211, |
| "step": 16650 |
| }, |
| { |
| "epoch": 15.973154362416107, |
| "grad_norm": 0.2566201686859131, |
| "learning_rate": 4.093280815992989e-06, |
| "loss": 0.0244, |
| "step": 16660 |
| }, |
| { |
| "epoch": 15.98274209012464, |
| "grad_norm": 0.3387947380542755, |
| "learning_rate": 4.058865150225833e-06, |
| "loss": 0.0279, |
| "step": 16670 |
| }, |
| { |
| "epoch": 15.992329817833173, |
| "grad_norm": 0.5632581114768982, |
| "learning_rate": 4.024588654880079e-06, |
| "loss": 0.0298, |
| "step": 16680 |
| }, |
| { |
| "epoch": 16.001917545541705, |
| "grad_norm": 0.2585551142692566, |
| "learning_rate": 3.990451433790254e-06, |
| "loss": 0.0313, |
| "step": 16690 |
| }, |
| { |
| "epoch": 16.01150527325024, |
| "grad_norm": 0.2654295563697815, |
| "learning_rate": 3.956453590368914e-06, |
| "loss": 0.0258, |
| "step": 16700 |
| }, |
| { |
| "epoch": 16.02109300095877, |
| "grad_norm": 0.243434339761734, |
| "learning_rate": 3.922595227606435e-06, |
| "loss": 0.0263, |
| "step": 16710 |
| }, |
| { |
| "epoch": 16.030680728667306, |
| "grad_norm": 0.23672133684158325, |
| "learning_rate": 3.8888764480706276e-06, |
| "loss": 0.029, |
| "step": 16720 |
| }, |
| { |
| "epoch": 16.040268456375838, |
| "grad_norm": 0.28110471367836, |
| "learning_rate": 3.855297353906512e-06, |
| "loss": 0.0313, |
| "step": 16730 |
| }, |
| { |
| "epoch": 16.049856184084373, |
| "grad_norm": 0.17387288808822632, |
| "learning_rate": 3.821858046835913e-06, |
| "loss": 0.0263, |
| "step": 16740 |
| }, |
| { |
| "epoch": 16.059443911792904, |
| "grad_norm": 0.16623635590076447, |
| "learning_rate": 3.7885586281572016e-06, |
| "loss": 0.0234, |
| "step": 16750 |
| }, |
| { |
| "epoch": 16.06903163950144, |
| "grad_norm": 0.20889221131801605, |
| "learning_rate": 3.7553991987449912e-06, |
| "loss": 0.0198, |
| "step": 16760 |
| }, |
| { |
| "epoch": 16.07861936720997, |
| "grad_norm": 0.2764891982078552, |
| "learning_rate": 3.7223798590498403e-06, |
| "loss": 0.0306, |
| "step": 16770 |
| }, |
| { |
| "epoch": 16.088207094918506, |
| "grad_norm": 0.17139260470867157, |
| "learning_rate": 3.689500709097893e-06, |
| "loss": 0.0204, |
| "step": 16780 |
| }, |
| { |
| "epoch": 16.097794822627037, |
| "grad_norm": 0.25818943977355957, |
| "learning_rate": 3.6567618484906307e-06, |
| "loss": 0.0243, |
| "step": 16790 |
| }, |
| { |
| "epoch": 16.107382550335572, |
| "grad_norm": 0.33521944284439087, |
| "learning_rate": 3.6241633764045545e-06, |
| "loss": 0.0289, |
| "step": 16800 |
| }, |
| { |
| "epoch": 16.116970278044104, |
| "grad_norm": 0.23774349689483643, |
| "learning_rate": 3.591705391590905e-06, |
| "loss": 0.0284, |
| "step": 16810 |
| }, |
| { |
| "epoch": 16.126558005752635, |
| "grad_norm": 0.17396867275238037, |
| "learning_rate": 3.5593879923753015e-06, |
| "loss": 0.0292, |
| "step": 16820 |
| }, |
| { |
| "epoch": 16.13614573346117, |
| "grad_norm": 0.32836684584617615, |
| "learning_rate": 3.5272112766574993e-06, |
| "loss": 0.0261, |
| "step": 16830 |
| }, |
| { |
| "epoch": 16.1457334611697, |
| "grad_norm": 0.2727390229701996, |
| "learning_rate": 3.4951753419110943e-06, |
| "loss": 0.0294, |
| "step": 16840 |
| }, |
| { |
| "epoch": 16.155321188878236, |
| "grad_norm": 0.36386972665786743, |
| "learning_rate": 3.4632802851832013e-06, |
| "loss": 0.0256, |
| "step": 16850 |
| }, |
| { |
| "epoch": 16.164908916586768, |
| "grad_norm": 0.20322419703006744, |
| "learning_rate": 3.431526203094171e-06, |
| "loss": 0.0242, |
| "step": 16860 |
| }, |
| { |
| "epoch": 16.174496644295303, |
| "grad_norm": 0.23579928278923035, |
| "learning_rate": 3.3999131918372785e-06, |
| "loss": 0.03, |
| "step": 16870 |
| }, |
| { |
| "epoch": 16.184084372003834, |
| "grad_norm": 0.20980890095233917, |
| "learning_rate": 3.3684413471784804e-06, |
| "loss": 0.0281, |
| "step": 16880 |
| }, |
| { |
| "epoch": 16.19367209971237, |
| "grad_norm": 0.17388616502285004, |
| "learning_rate": 3.3371107644560805e-06, |
| "loss": 0.0312, |
| "step": 16890 |
| }, |
| { |
| "epoch": 16.2032598274209, |
| "grad_norm": 0.43162086606025696, |
| "learning_rate": 3.3059215385804585e-06, |
| "loss": 0.0281, |
| "step": 16900 |
| }, |
| { |
| "epoch": 16.212847555129436, |
| "grad_norm": 0.21873044967651367, |
| "learning_rate": 3.274873764033759e-06, |
| "loss": 0.0255, |
| "step": 16910 |
| }, |
| { |
| "epoch": 16.222435282837967, |
| "grad_norm": 0.2102050930261612, |
| "learning_rate": 3.243967534869652e-06, |
| "loss": 0.0272, |
| "step": 16920 |
| }, |
| { |
| "epoch": 16.232023010546502, |
| "grad_norm": 0.21298690140247345, |
| "learning_rate": 3.213202944713023e-06, |
| "loss": 0.0261, |
| "step": 16930 |
| }, |
| { |
| "epoch": 16.241610738255034, |
| "grad_norm": 0.30388498306274414, |
| "learning_rate": 3.1825800867596566e-06, |
| "loss": 0.0338, |
| "step": 16940 |
| }, |
| { |
| "epoch": 16.251198465963565, |
| "grad_norm": 0.2536049485206604, |
| "learning_rate": 3.152099053776014e-06, |
| "loss": 0.0292, |
| "step": 16950 |
| }, |
| { |
| "epoch": 16.2607861936721, |
| "grad_norm": 0.2809562385082245, |
| "learning_rate": 3.121759938098906e-06, |
| "loss": 0.0262, |
| "step": 16960 |
| }, |
| { |
| "epoch": 16.27037392138063, |
| "grad_norm": 0.2241629660129547, |
| "learning_rate": 3.091562831635253e-06, |
| "loss": 0.0288, |
| "step": 16970 |
| }, |
| { |
| "epoch": 16.279961649089167, |
| "grad_norm": 0.1237056627869606, |
| "learning_rate": 3.061507825861748e-06, |
| "loss": 0.0209, |
| "step": 16980 |
| }, |
| { |
| "epoch": 16.289549376797698, |
| "grad_norm": 0.13440051674842834, |
| "learning_rate": 3.031595011824656e-06, |
| "loss": 0.0273, |
| "step": 16990 |
| }, |
| { |
| "epoch": 16.299137104506233, |
| "grad_norm": 0.28445371985435486, |
| "learning_rate": 3.0018244801394535e-06, |
| "loss": 0.034, |
| "step": 17000 |
| }, |
| { |
| "epoch": 16.308724832214764, |
| "grad_norm": 0.3177470862865448, |
| "learning_rate": 2.9721963209906502e-06, |
| "loss": 0.0301, |
| "step": 17010 |
| }, |
| { |
| "epoch": 16.3183125599233, |
| "grad_norm": 0.1341092437505722, |
| "learning_rate": 2.942710624131412e-06, |
| "loss": 0.0266, |
| "step": 17020 |
| }, |
| { |
| "epoch": 16.32790028763183, |
| "grad_norm": 0.19116052985191345, |
| "learning_rate": 2.9133674788833833e-06, |
| "loss": 0.0311, |
| "step": 17030 |
| }, |
| { |
| "epoch": 16.337488015340366, |
| "grad_norm": 0.1874174177646637, |
| "learning_rate": 2.884166974136343e-06, |
| "loss": 0.0236, |
| "step": 17040 |
| }, |
| { |
| "epoch": 16.347075743048897, |
| "grad_norm": 0.36720889806747437, |
| "learning_rate": 2.855109198347983e-06, |
| "loss": 0.0278, |
| "step": 17050 |
| }, |
| { |
| "epoch": 16.35666347075743, |
| "grad_norm": 0.38599368929862976, |
| "learning_rate": 2.826194239543617e-06, |
| "loss": 0.0323, |
| "step": 17060 |
| }, |
| { |
| "epoch": 16.366251198465964, |
| "grad_norm": 0.19532305002212524, |
| "learning_rate": 2.797422185315929e-06, |
| "loss": 0.0222, |
| "step": 17070 |
| }, |
| { |
| "epoch": 16.375838926174495, |
| "grad_norm": 0.2218206375837326, |
| "learning_rate": 2.768793122824681e-06, |
| "loss": 0.0255, |
| "step": 17080 |
| }, |
| { |
| "epoch": 16.38542665388303, |
| "grad_norm": 0.3124590516090393, |
| "learning_rate": 2.740307138796483e-06, |
| "loss": 0.0249, |
| "step": 17090 |
| }, |
| { |
| "epoch": 16.39501438159156, |
| "grad_norm": 0.21726781129837036, |
| "learning_rate": 2.7119643195245238e-06, |
| "loss": 0.0218, |
| "step": 17100 |
| }, |
| { |
| "epoch": 16.404602109300097, |
| "grad_norm": 0.5927583575248718, |
| "learning_rate": 2.683764750868273e-06, |
| "loss": 0.0263, |
| "step": 17110 |
| }, |
| { |
| "epoch": 16.414189837008628, |
| "grad_norm": 0.28960007429122925, |
| "learning_rate": 2.6557085182532582e-06, |
| "loss": 0.0291, |
| "step": 17120 |
| }, |
| { |
| "epoch": 16.423777564717163, |
| "grad_norm": 0.35697048902511597, |
| "learning_rate": 2.6277957066708047e-06, |
| "loss": 0.0273, |
| "step": 17130 |
| }, |
| { |
| "epoch": 16.433365292425695, |
| "grad_norm": 0.2136591225862503, |
| "learning_rate": 2.6000264006777743e-06, |
| "loss": 0.0325, |
| "step": 17140 |
| }, |
| { |
| "epoch": 16.44295302013423, |
| "grad_norm": 0.3051040768623352, |
| "learning_rate": 2.5724006843962866e-06, |
| "loss": 0.0298, |
| "step": 17150 |
| }, |
| { |
| "epoch": 16.45254074784276, |
| "grad_norm": 0.1534937173128128, |
| "learning_rate": 2.5449186415134885e-06, |
| "loss": 0.0263, |
| "step": 17160 |
| }, |
| { |
| "epoch": 16.462128475551296, |
| "grad_norm": 0.17988426983356476, |
| "learning_rate": 2.5175803552812906e-06, |
| "loss": 0.0278, |
| "step": 17170 |
| }, |
| { |
| "epoch": 16.471716203259827, |
| "grad_norm": 0.48748767375946045, |
| "learning_rate": 2.490385908516141e-06, |
| "loss": 0.0308, |
| "step": 17180 |
| }, |
| { |
| "epoch": 16.48130393096836, |
| "grad_norm": 0.191914901137352, |
| "learning_rate": 2.463335383598725e-06, |
| "loss": 0.0303, |
| "step": 17190 |
| }, |
| { |
| "epoch": 16.490891658676894, |
| "grad_norm": 0.21671634912490845, |
| "learning_rate": 2.4364288624737442e-06, |
| "loss": 0.0276, |
| "step": 17200 |
| }, |
| { |
| "epoch": 16.500479386385425, |
| "grad_norm": 0.13923166692256927, |
| "learning_rate": 2.4096664266496814e-06, |
| "loss": 0.0331, |
| "step": 17210 |
| }, |
| { |
| "epoch": 16.51006711409396, |
| "grad_norm": 0.20780488848686218, |
| "learning_rate": 2.3830481571985365e-06, |
| "loss": 0.0243, |
| "step": 17220 |
| }, |
| { |
| "epoch": 16.51965484180249, |
| "grad_norm": 0.39643654227256775, |
| "learning_rate": 2.3565741347555792e-06, |
| "loss": 0.0289, |
| "step": 17230 |
| }, |
| { |
| "epoch": 16.529242569511027, |
| "grad_norm": 0.18083330988883972, |
| "learning_rate": 2.3302444395190915e-06, |
| "loss": 0.0216, |
| "step": 17240 |
| }, |
| { |
| "epoch": 16.538830297219558, |
| "grad_norm": 0.1432444006204605, |
| "learning_rate": 2.3040591512501765e-06, |
| "loss": 0.0318, |
| "step": 17250 |
| }, |
| { |
| "epoch": 16.548418024928093, |
| "grad_norm": 0.2874661386013031, |
| "learning_rate": 2.278018349272465e-06, |
| "loss": 0.0279, |
| "step": 17260 |
| }, |
| { |
| "epoch": 16.558005752636625, |
| "grad_norm": 0.2093266099691391, |
| "learning_rate": 2.2521221124718826e-06, |
| "loss": 0.0226, |
| "step": 17270 |
| }, |
| { |
| "epoch": 16.56759348034516, |
| "grad_norm": 0.3234308063983917, |
| "learning_rate": 2.2263705192964334e-06, |
| "loss": 0.0295, |
| "step": 17280 |
| }, |
| { |
| "epoch": 16.57718120805369, |
| "grad_norm": 0.6225463151931763, |
| "learning_rate": 2.2007636477559436e-06, |
| "loss": 0.031, |
| "step": 17290 |
| }, |
| { |
| "epoch": 16.586768935762223, |
| "grad_norm": 0.31777986884117126, |
| "learning_rate": 2.1753015754218453e-06, |
| "loss": 0.0311, |
| "step": 17300 |
| }, |
| { |
| "epoch": 16.596356663470758, |
| "grad_norm": 0.2332683950662613, |
| "learning_rate": 2.149984379426906e-06, |
| "loss": 0.0263, |
| "step": 17310 |
| }, |
| { |
| "epoch": 16.60594439117929, |
| "grad_norm": 0.23592767119407654, |
| "learning_rate": 2.1248121364650265e-06, |
| "loss": 0.0229, |
| "step": 17320 |
| }, |
| { |
| "epoch": 16.615532118887824, |
| "grad_norm": 0.4014437198638916, |
| "learning_rate": 2.0997849227909983e-06, |
| "loss": 0.026, |
| "step": 17330 |
| }, |
| { |
| "epoch": 16.625119846596355, |
| "grad_norm": 0.18571177124977112, |
| "learning_rate": 2.0749028142202807e-06, |
| "loss": 0.0281, |
| "step": 17340 |
| }, |
| { |
| "epoch": 16.63470757430489, |
| "grad_norm": 0.2480279952287674, |
| "learning_rate": 2.050165886128741e-06, |
| "loss": 0.0283, |
| "step": 17350 |
| }, |
| { |
| "epoch": 16.644295302013422, |
| "grad_norm": 0.20139874517917633, |
| "learning_rate": 2.0255742134524804e-06, |
| "loss": 0.0263, |
| "step": 17360 |
| }, |
| { |
| "epoch": 16.653883029721957, |
| "grad_norm": 0.18241684138774872, |
| "learning_rate": 2.001127870687541e-06, |
| "loss": 0.0206, |
| "step": 17370 |
| }, |
| { |
| "epoch": 16.66347075743049, |
| "grad_norm": 0.26072490215301514, |
| "learning_rate": 1.9768269318897414e-06, |
| "loss": 0.0251, |
| "step": 17380 |
| }, |
| { |
| "epoch": 16.673058485139023, |
| "grad_norm": 0.33512383699417114, |
| "learning_rate": 1.9526714706744055e-06, |
| "loss": 0.0282, |
| "step": 17390 |
| }, |
| { |
| "epoch": 16.682646212847555, |
| "grad_norm": 0.279745876789093, |
| "learning_rate": 1.928661560216172e-06, |
| "loss": 0.0233, |
| "step": 17400 |
| }, |
| { |
| "epoch": 16.69223394055609, |
| "grad_norm": 0.2306470274925232, |
| "learning_rate": 1.904797273248754e-06, |
| "loss": 0.0272, |
| "step": 17410 |
| }, |
| { |
| "epoch": 16.70182166826462, |
| "grad_norm": 0.14322997629642487, |
| "learning_rate": 1.8810786820647242e-06, |
| "loss": 0.0272, |
| "step": 17420 |
| }, |
| { |
| "epoch": 16.711409395973153, |
| "grad_norm": 0.25938233733177185, |
| "learning_rate": 1.8575058585152905e-06, |
| "loss": 0.0308, |
| "step": 17430 |
| }, |
| { |
| "epoch": 16.720997123681688, |
| "grad_norm": 0.23380053043365479, |
| "learning_rate": 1.8340788740101034e-06, |
| "loss": 0.028, |
| "step": 17440 |
| }, |
| { |
| "epoch": 16.73058485139022, |
| "grad_norm": 0.27241095900535583, |
| "learning_rate": 1.810797799517e-06, |
| "loss": 0.0293, |
| "step": 17450 |
| }, |
| { |
| "epoch": 16.740172579098754, |
| "grad_norm": 0.24621997773647308, |
| "learning_rate": 1.7876627055618155e-06, |
| "loss": 0.0258, |
| "step": 17460 |
| }, |
| { |
| "epoch": 16.749760306807286, |
| "grad_norm": 0.15812641382217407, |
| "learning_rate": 1.7646736622281667e-06, |
| "loss": 0.0259, |
| "step": 17470 |
| }, |
| { |
| "epoch": 16.75934803451582, |
| "grad_norm": 0.18936626613140106, |
| "learning_rate": 1.7418307391572354e-06, |
| "loss": 0.026, |
| "step": 17480 |
| }, |
| { |
| "epoch": 16.768935762224352, |
| "grad_norm": 0.16878223419189453, |
| "learning_rate": 1.7191340055475513e-06, |
| "loss": 0.0281, |
| "step": 17490 |
| }, |
| { |
| "epoch": 16.778523489932887, |
| "grad_norm": 0.18892349302768707, |
| "learning_rate": 1.696583530154794e-06, |
| "loss": 0.0259, |
| "step": 17500 |
| }, |
| { |
| "epoch": 16.78811121764142, |
| "grad_norm": 0.243266299366951, |
| "learning_rate": 1.6741793812915907e-06, |
| "loss": 0.0248, |
| "step": 17510 |
| }, |
| { |
| "epoch": 16.797698945349953, |
| "grad_norm": 0.20740211009979248, |
| "learning_rate": 1.6519216268272796e-06, |
| "loss": 0.0264, |
| "step": 17520 |
| }, |
| { |
| "epoch": 16.807286673058485, |
| "grad_norm": 0.16220887005329132, |
| "learning_rate": 1.6298103341877369e-06, |
| "loss": 0.0226, |
| "step": 17530 |
| }, |
| { |
| "epoch": 16.81687440076702, |
| "grad_norm": 0.3126187026500702, |
| "learning_rate": 1.6078455703551486e-06, |
| "loss": 0.0326, |
| "step": 17540 |
| }, |
| { |
| "epoch": 16.82646212847555, |
| "grad_norm": 0.1612725555896759, |
| "learning_rate": 1.5860274018678345e-06, |
| "loss": 0.0327, |
| "step": 17550 |
| }, |
| { |
| "epoch": 16.836049856184083, |
| "grad_norm": 0.20316867530345917, |
| "learning_rate": 1.5643558948200131e-06, |
| "loss": 0.0252, |
| "step": 17560 |
| }, |
| { |
| "epoch": 16.845637583892618, |
| "grad_norm": 0.20207004249095917, |
| "learning_rate": 1.5428311148616204e-06, |
| "loss": 0.0298, |
| "step": 17570 |
| }, |
| { |
| "epoch": 16.85522531160115, |
| "grad_norm": 0.2780834436416626, |
| "learning_rate": 1.5214531271981192e-06, |
| "loss": 0.026, |
| "step": 17580 |
| }, |
| { |
| "epoch": 16.864813039309684, |
| "grad_norm": 0.3551330268383026, |
| "learning_rate": 1.5002219965902896e-06, |
| "loss": 0.0255, |
| "step": 17590 |
| }, |
| { |
| "epoch": 16.874400767018216, |
| "grad_norm": 0.23651057481765747, |
| "learning_rate": 1.4791377873540235e-06, |
| "loss": 0.0274, |
| "step": 17600 |
| }, |
| { |
| "epoch": 16.88398849472675, |
| "grad_norm": 0.19430945813655853, |
| "learning_rate": 1.4582005633601515e-06, |
| "loss": 0.0232, |
| "step": 17610 |
| }, |
| { |
| "epoch": 16.893576222435282, |
| "grad_norm": 0.21821914613246918, |
| "learning_rate": 1.437410388034227e-06, |
| "loss": 0.0278, |
| "step": 17620 |
| }, |
| { |
| "epoch": 16.903163950143817, |
| "grad_norm": 0.23415020108222961, |
| "learning_rate": 1.4167673243563717e-06, |
| "loss": 0.0331, |
| "step": 17630 |
| }, |
| { |
| "epoch": 16.91275167785235, |
| "grad_norm": 0.207551971077919, |
| "learning_rate": 1.3962714348610295e-06, |
| "loss": 0.0305, |
| "step": 17640 |
| }, |
| { |
| "epoch": 16.922339405560884, |
| "grad_norm": 0.28280988335609436, |
| "learning_rate": 1.3759227816368182e-06, |
| "loss": 0.0297, |
| "step": 17650 |
| }, |
| { |
| "epoch": 16.931927133269415, |
| "grad_norm": 0.24366876482963562, |
| "learning_rate": 1.3557214263263286e-06, |
| "loss": 0.0247, |
| "step": 17660 |
| }, |
| { |
| "epoch": 16.941514860977946, |
| "grad_norm": 0.20423495769500732, |
| "learning_rate": 1.3356674301259532e-06, |
| "loss": 0.0263, |
| "step": 17670 |
| }, |
| { |
| "epoch": 16.95110258868648, |
| "grad_norm": 0.19706788659095764, |
| "learning_rate": 1.3157608537856582e-06, |
| "loss": 0.0297, |
| "step": 17680 |
| }, |
| { |
| "epoch": 16.960690316395013, |
| "grad_norm": 0.2174736112356186, |
| "learning_rate": 1.2960017576088446e-06, |
| "loss": 0.0278, |
| "step": 17690 |
| }, |
| { |
| "epoch": 16.970278044103548, |
| "grad_norm": 0.2222086638212204, |
| "learning_rate": 1.2763902014521656e-06, |
| "loss": 0.0276, |
| "step": 17700 |
| }, |
| { |
| "epoch": 16.97986577181208, |
| "grad_norm": 0.20257794857025146, |
| "learning_rate": 1.2569262447252928e-06, |
| "loss": 0.034, |
| "step": 17710 |
| }, |
| { |
| "epoch": 16.989453499520614, |
| "grad_norm": 0.2699783146381378, |
| "learning_rate": 1.2376099463907887e-06, |
| "loss": 0.0226, |
| "step": 17720 |
| }, |
| { |
| "epoch": 16.999041227229146, |
| "grad_norm": 0.19566196203231812, |
| "learning_rate": 1.2184413649639182e-06, |
| "loss": 0.028, |
| "step": 17730 |
| }, |
| { |
| "epoch": 17.00862895493768, |
| "grad_norm": 0.23381511867046356, |
| "learning_rate": 1.1994205585124652e-06, |
| "loss": 0.029, |
| "step": 17740 |
| }, |
| { |
| "epoch": 17.018216682646212, |
| "grad_norm": 0.19119040668010712, |
| "learning_rate": 1.180547584656533e-06, |
| "loss": 0.0239, |
| "step": 17750 |
| }, |
| { |
| "epoch": 17.027804410354747, |
| "grad_norm": 0.23085108399391174, |
| "learning_rate": 1.1618225005684158e-06, |
| "loss": 0.0275, |
| "step": 17760 |
| }, |
| { |
| "epoch": 17.03739213806328, |
| "grad_norm": 0.21077860891819, |
| "learning_rate": 1.1432453629723893e-06, |
| "loss": 0.0309, |
| "step": 17770 |
| }, |
| { |
| "epoch": 17.046979865771814, |
| "grad_norm": 0.18925194442272186, |
| "learning_rate": 1.124816228144565e-06, |
| "loss": 0.0271, |
| "step": 17780 |
| }, |
| { |
| "epoch": 17.056567593480345, |
| "grad_norm": 0.22407986223697662, |
| "learning_rate": 1.106535151912702e-06, |
| "loss": 0.0273, |
| "step": 17790 |
| }, |
| { |
| "epoch": 17.066155321188877, |
| "grad_norm": 0.21448639035224915, |
| "learning_rate": 1.0884021896560237e-06, |
| "loss": 0.0258, |
| "step": 17800 |
| }, |
| { |
| "epoch": 17.07574304889741, |
| "grad_norm": 0.24161478877067566, |
| "learning_rate": 1.0704173963050957e-06, |
| "loss": 0.0289, |
| "step": 17810 |
| }, |
| { |
| "epoch": 17.085330776605943, |
| "grad_norm": 0.1643606573343277, |
| "learning_rate": 1.0525808263416205e-06, |
| "loss": 0.0258, |
| "step": 17820 |
| }, |
| { |
| "epoch": 17.094918504314478, |
| "grad_norm": 0.2575829327106476, |
| "learning_rate": 1.0348925337982817e-06, |
| "loss": 0.0274, |
| "step": 17830 |
| }, |
| { |
| "epoch": 17.10450623202301, |
| "grad_norm": 0.1602732241153717, |
| "learning_rate": 1.0173525722585897e-06, |
| "loss": 0.0358, |
| "step": 17840 |
| }, |
| { |
| "epoch": 17.114093959731544, |
| "grad_norm": 0.23271816968917847, |
| "learning_rate": 9.999609948567024e-07, |
| "loss": 0.0373, |
| "step": 17850 |
| }, |
| { |
| "epoch": 17.123681687440076, |
| "grad_norm": 0.18822619318962097, |
| "learning_rate": 9.82717854277293e-07, |
| "loss": 0.0278, |
| "step": 17860 |
| }, |
| { |
| "epoch": 17.13326941514861, |
| "grad_norm": 0.37295079231262207, |
| "learning_rate": 9.656232027553558e-07, |
| "loss": 0.0245, |
| "step": 17870 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "grad_norm": 0.207114115357399, |
| "learning_rate": 9.486770920760668e-07, |
| "loss": 0.0237, |
| "step": 17880 |
| }, |
| { |
| "epoch": 17.152444870565677, |
| "grad_norm": 0.2382437288761139, |
| "learning_rate": 9.318795735746233e-07, |
| "loss": 0.0262, |
| "step": 17890 |
| }, |
| { |
| "epoch": 17.16203259827421, |
| "grad_norm": 0.3437121510505676, |
| "learning_rate": 9.152306981360992e-07, |
| "loss": 0.0274, |
| "step": 17900 |
| }, |
| { |
| "epoch": 17.171620325982744, |
| "grad_norm": 0.1845656931400299, |
| "learning_rate": 8.987305161952731e-07, |
| "loss": 0.0251, |
| "step": 17910 |
| }, |
| { |
| "epoch": 17.181208053691275, |
| "grad_norm": 0.2611910402774811, |
| "learning_rate": 8.823790777364837e-07, |
| "loss": 0.0263, |
| "step": 17920 |
| }, |
| { |
| "epoch": 17.190795781399807, |
| "grad_norm": 0.3325332701206207, |
| "learning_rate": 8.661764322934695e-07, |
| "loss": 0.0314, |
| "step": 17930 |
| }, |
| { |
| "epoch": 17.20038350910834, |
| "grad_norm": 0.38311854004859924, |
| "learning_rate": 8.50122628949257e-07, |
| "loss": 0.0279, |
| "step": 17940 |
| }, |
| { |
| "epoch": 17.209971236816873, |
| "grad_norm": 0.1343742161989212, |
| "learning_rate": 8.342177163359389e-07, |
| "loss": 0.028, |
| "step": 17950 |
| }, |
| { |
| "epoch": 17.219558964525408, |
| "grad_norm": 0.19379399716854095, |
| "learning_rate": 8.184617426346131e-07, |
| "loss": 0.0301, |
| "step": 17960 |
| }, |
| { |
| "epoch": 17.22914669223394, |
| "grad_norm": 0.16689153015613556, |
| "learning_rate": 8.028547555751553e-07, |
| "loss": 0.029, |
| "step": 17970 |
| }, |
| { |
| "epoch": 17.238734419942475, |
| "grad_norm": 0.45647260546684265, |
| "learning_rate": 7.873968024361467e-07, |
| "loss": 0.0307, |
| "step": 17980 |
| }, |
| { |
| "epoch": 17.248322147651006, |
| "grad_norm": 0.19029688835144043, |
| "learning_rate": 7.720879300446682e-07, |
| "loss": 0.0269, |
| "step": 17990 |
| }, |
| { |
| "epoch": 17.25790987535954, |
| "grad_norm": 0.26700901985168457, |
| "learning_rate": 7.569281847762122e-07, |
| "loss": 0.026, |
| "step": 18000 |
| }, |
| { |
| "epoch": 17.267497603068072, |
| "grad_norm": 0.20858362317085266, |
| "learning_rate": 7.419176125544991e-07, |
| "loss": 0.0304, |
| "step": 18010 |
| }, |
| { |
| "epoch": 17.277085330776607, |
| "grad_norm": 0.23115743696689606, |
| "learning_rate": 7.270562588513663e-07, |
| "loss": 0.0389, |
| "step": 18020 |
| }, |
| { |
| "epoch": 17.28667305848514, |
| "grad_norm": 0.17492881417274475, |
| "learning_rate": 7.123441686866183e-07, |
| "loss": 0.0293, |
| "step": 18030 |
| }, |
| { |
| "epoch": 17.29626078619367, |
| "grad_norm": 0.12759244441986084, |
| "learning_rate": 6.977813866278826e-07, |
| "loss": 0.0239, |
| "step": 18040 |
| }, |
| { |
| "epoch": 17.305848513902205, |
| "grad_norm": 0.18989066779613495, |
| "learning_rate": 6.833679567905038e-07, |
| "loss": 0.0292, |
| "step": 18050 |
| }, |
| { |
| "epoch": 17.315436241610737, |
| "grad_norm": 0.5339308977127075, |
| "learning_rate": 6.691039228373774e-07, |
| "loss": 0.0337, |
| "step": 18060 |
| }, |
| { |
| "epoch": 17.325023969319272, |
| "grad_norm": 0.18861901760101318, |
| "learning_rate": 6.549893279788277e-07, |
| "loss": 0.0288, |
| "step": 18070 |
| }, |
| { |
| "epoch": 17.334611697027803, |
| "grad_norm": 0.18615840375423431, |
| "learning_rate": 6.410242149724966e-07, |
| "loss": 0.0246, |
| "step": 18080 |
| }, |
| { |
| "epoch": 17.34419942473634, |
| "grad_norm": 0.1773938536643982, |
| "learning_rate": 6.272086261231769e-07, |
| "loss": 0.0272, |
| "step": 18090 |
| }, |
| { |
| "epoch": 17.35378715244487, |
| "grad_norm": 0.2144092619419098, |
| "learning_rate": 6.135426032827185e-07, |
| "loss": 0.0299, |
| "step": 18100 |
| }, |
| { |
| "epoch": 17.363374880153405, |
| "grad_norm": 0.18490025401115417, |
| "learning_rate": 6.000261878498947e-07, |
| "loss": 0.0297, |
| "step": 18110 |
| }, |
| { |
| "epoch": 17.372962607861936, |
| "grad_norm": 0.18837903439998627, |
| "learning_rate": 5.86659420770247e-07, |
| "loss": 0.0272, |
| "step": 18120 |
| }, |
| { |
| "epoch": 17.38255033557047, |
| "grad_norm": 0.2982289791107178, |
| "learning_rate": 5.734423425359958e-07, |
| "loss": 0.0314, |
| "step": 18130 |
| }, |
| { |
| "epoch": 17.392138063279003, |
| "grad_norm": 0.2356351912021637, |
| "learning_rate": 5.603749931859137e-07, |
| "loss": 0.0258, |
| "step": 18140 |
| }, |
| { |
| "epoch": 17.401725790987538, |
| "grad_norm": 0.13853472471237183, |
| "learning_rate": 5.474574123051912e-07, |
| "loss": 0.0289, |
| "step": 18150 |
| }, |
| { |
| "epoch": 17.41131351869607, |
| "grad_norm": 0.2044096440076828, |
| "learning_rate": 5.346896390253153e-07, |
| "loss": 0.0244, |
| "step": 18160 |
| }, |
| { |
| "epoch": 17.4209012464046, |
| "grad_norm": 0.33529403805732727, |
| "learning_rate": 5.220717120239693e-07, |
| "loss": 0.0282, |
| "step": 18170 |
| }, |
| { |
| "epoch": 17.430488974113135, |
| "grad_norm": 0.2302224040031433, |
| "learning_rate": 5.096036695248885e-07, |
| "loss": 0.0299, |
| "step": 18180 |
| }, |
| { |
| "epoch": 17.440076701821667, |
| "grad_norm": 0.22276417911052704, |
| "learning_rate": 4.972855492977823e-07, |
| "loss": 0.0294, |
| "step": 18190 |
| }, |
| { |
| "epoch": 17.449664429530202, |
| "grad_norm": 0.5279762744903564, |
| "learning_rate": 4.851173886581794e-07, |
| "loss": 0.0286, |
| "step": 18200 |
| }, |
| { |
| "epoch": 17.459252157238733, |
| "grad_norm": 0.22499582171440125, |
| "learning_rate": 4.7309922446732715e-07, |
| "loss": 0.0239, |
| "step": 18210 |
| }, |
| { |
| "epoch": 17.46883988494727, |
| "grad_norm": 0.2594180703163147, |
| "learning_rate": 4.61231093132114e-07, |
| "loss": 0.0275, |
| "step": 18220 |
| }, |
| { |
| "epoch": 17.4784276126558, |
| "grad_norm": 0.1713213175535202, |
| "learning_rate": 4.495130306049034e-07, |
| "loss": 0.0243, |
| "step": 18230 |
| }, |
| { |
| "epoch": 17.488015340364335, |
| "grad_norm": 0.3286925256252289, |
| "learning_rate": 4.3794507238347214e-07, |
| "loss": 0.0316, |
| "step": 18240 |
| }, |
| { |
| "epoch": 17.497603068072866, |
| "grad_norm": 0.23200523853302002, |
| "learning_rate": 4.2652725351085556e-07, |
| "loss": 0.0265, |
| "step": 18250 |
| }, |
| { |
| "epoch": 17.5071907957814, |
| "grad_norm": 0.22095492482185364, |
| "learning_rate": 4.1525960857530243e-07, |
| "loss": 0.024, |
| "step": 18260 |
| }, |
| { |
| "epoch": 17.516778523489933, |
| "grad_norm": 0.17762340605258942, |
| "learning_rate": 4.041421717101146e-07, |
| "loss": 0.0268, |
| "step": 18270 |
| }, |
| { |
| "epoch": 17.526366251198468, |
| "grad_norm": 0.2298087775707245, |
| "learning_rate": 3.931749765935744e-07, |
| "loss": 0.0257, |
| "step": 18280 |
| }, |
| { |
| "epoch": 17.535953978907, |
| "grad_norm": 0.21401867270469666, |
| "learning_rate": 3.8235805644882273e-07, |
| "loss": 0.0245, |
| "step": 18290 |
| }, |
| { |
| "epoch": 17.54554170661553, |
| "grad_norm": 0.5458080172538757, |
| "learning_rate": 3.716914440437813e-07, |
| "loss": 0.033, |
| "step": 18300 |
| }, |
| { |
| "epoch": 17.555129434324066, |
| "grad_norm": 0.17889949679374695, |
| "learning_rate": 3.611751716910472e-07, |
| "loss": 0.0303, |
| "step": 18310 |
| }, |
| { |
| "epoch": 17.564717162032597, |
| "grad_norm": 0.0861106589436531, |
| "learning_rate": 3.508092712477651e-07, |
| "loss": 0.025, |
| "step": 18320 |
| }, |
| { |
| "epoch": 17.574304889741132, |
| "grad_norm": 0.396636962890625, |
| "learning_rate": 3.405937741155829e-07, |
| "loss": 0.03, |
| "step": 18330 |
| }, |
| { |
| "epoch": 17.583892617449663, |
| "grad_norm": 0.3980105221271515, |
| "learning_rate": 3.30528711240502e-07, |
| "loss": 0.0217, |
| "step": 18340 |
| }, |
| { |
| "epoch": 17.5934803451582, |
| "grad_norm": 0.2600933313369751, |
| "learning_rate": 3.206141131128326e-07, |
| "loss": 0.0278, |
| "step": 18350 |
| }, |
| { |
| "epoch": 17.60306807286673, |
| "grad_norm": 0.20506466925144196, |
| "learning_rate": 3.108500097670719e-07, |
| "loss": 0.0216, |
| "step": 18360 |
| }, |
| { |
| "epoch": 17.612655800575265, |
| "grad_norm": 0.31107306480407715, |
| "learning_rate": 3.0123643078180943e-07, |
| "loss": 0.0296, |
| "step": 18370 |
| }, |
| { |
| "epoch": 17.622243528283796, |
| "grad_norm": 0.2587839663028717, |
| "learning_rate": 2.9177340527966613e-07, |
| "loss": 0.0265, |
| "step": 18380 |
| }, |
| { |
| "epoch": 17.63183125599233, |
| "grad_norm": 0.293157160282135, |
| "learning_rate": 2.824609619271723e-07, |
| "loss": 0.0239, |
| "step": 18390 |
| }, |
| { |
| "epoch": 17.641418983700863, |
| "grad_norm": 0.22268742322921753, |
| "learning_rate": 2.732991289347064e-07, |
| "loss": 0.0283, |
| "step": 18400 |
| }, |
| { |
| "epoch": 17.651006711409394, |
| "grad_norm": 0.21071119606494904, |
| "learning_rate": 2.6428793405640087e-07, |
| "loss": 0.0241, |
| "step": 18410 |
| }, |
| { |
| "epoch": 17.66059443911793, |
| "grad_norm": 0.25878384709358215, |
| "learning_rate": 2.554274045900418e-07, |
| "loss": 0.0224, |
| "step": 18420 |
| }, |
| { |
| "epoch": 17.67018216682646, |
| "grad_norm": 0.2513992488384247, |
| "learning_rate": 2.46717567377025e-07, |
| "loss": 0.0271, |
| "step": 18430 |
| }, |
| { |
| "epoch": 17.679769894534996, |
| "grad_norm": 0.1096489429473877, |
| "learning_rate": 2.381584488022337e-07, |
| "loss": 0.0233, |
| "step": 18440 |
| }, |
| { |
| "epoch": 17.689357622243527, |
| "grad_norm": 0.24723054468631744, |
| "learning_rate": 2.2975007479397738e-07, |
| "loss": 0.0254, |
| "step": 18450 |
| }, |
| { |
| "epoch": 17.698945349952062, |
| "grad_norm": 0.22072063386440277, |
| "learning_rate": 2.2149247082392522e-07, |
| "loss": 0.0273, |
| "step": 18460 |
| }, |
| { |
| "epoch": 17.708533077660594, |
| "grad_norm": 0.2557280957698822, |
| "learning_rate": 2.1338566190699517e-07, |
| "loss": 0.0322, |
| "step": 18470 |
| }, |
| { |
| "epoch": 17.71812080536913, |
| "grad_norm": 0.3068563938140869, |
| "learning_rate": 2.0542967260131497e-07, |
| "loss": 0.0211, |
| "step": 18480 |
| }, |
| { |
| "epoch": 17.72770853307766, |
| "grad_norm": 0.18864025175571442, |
| "learning_rate": 1.976245270081334e-07, |
| "loss": 0.028, |
| "step": 18490 |
| }, |
| { |
| "epoch": 17.737296260786195, |
| "grad_norm": 0.20000196993350983, |
| "learning_rate": 1.899702487717203e-07, |
| "loss": 0.0239, |
| "step": 18500 |
| }, |
| { |
| "epoch": 17.746883988494726, |
| "grad_norm": 0.5573348999023438, |
| "learning_rate": 1.8246686107935562e-07, |
| "loss": 0.03, |
| "step": 18510 |
| }, |
| { |
| "epoch": 17.75647171620326, |
| "grad_norm": 0.09101556986570358, |
| "learning_rate": 1.7511438666119594e-07, |
| "loss": 0.0336, |
| "step": 18520 |
| }, |
| { |
| "epoch": 17.766059443911793, |
| "grad_norm": 0.2559066712856293, |
| "learning_rate": 1.6791284779024696e-07, |
| "loss": 0.0285, |
| "step": 18530 |
| }, |
| { |
| "epoch": 17.775647171620324, |
| "grad_norm": 0.23298071324825287, |
| "learning_rate": 1.6086226628226898e-07, |
| "loss": 0.0319, |
| "step": 18540 |
| }, |
| { |
| "epoch": 17.78523489932886, |
| "grad_norm": 0.1978902518749237, |
| "learning_rate": 1.5396266349574362e-07, |
| "loss": 0.0269, |
| "step": 18550 |
| }, |
| { |
| "epoch": 17.79482262703739, |
| "grad_norm": 0.5722432732582092, |
| "learning_rate": 1.4721406033177954e-07, |
| "loss": 0.0291, |
| "step": 18560 |
| }, |
| { |
| "epoch": 17.804410354745926, |
| "grad_norm": 0.29033163189888, |
| "learning_rate": 1.4061647723405125e-07, |
| "loss": 0.0288, |
| "step": 18570 |
| }, |
| { |
| "epoch": 17.813998082454457, |
| "grad_norm": 0.19131603837013245, |
| "learning_rate": 1.3416993418874924e-07, |
| "loss": 0.0247, |
| "step": 18580 |
| }, |
| { |
| "epoch": 17.823585810162992, |
| "grad_norm": 0.25687092542648315, |
| "learning_rate": 1.2787445072452998e-07, |
| "loss": 0.0267, |
| "step": 18590 |
| }, |
| { |
| "epoch": 17.833173537871524, |
| "grad_norm": 0.16243956983089447, |
| "learning_rate": 1.217300459124271e-07, |
| "loss": 0.0273, |
| "step": 18600 |
| }, |
| { |
| "epoch": 17.84276126558006, |
| "grad_norm": 0.17303957045078278, |
| "learning_rate": 1.1573673836580701e-07, |
| "loss": 0.0353, |
| "step": 18610 |
| }, |
| { |
| "epoch": 17.85234899328859, |
| "grad_norm": 0.4954906702041626, |
| "learning_rate": 1.0989454624032448e-07, |
| "loss": 0.0239, |
| "step": 18620 |
| }, |
| { |
| "epoch": 17.861936720997125, |
| "grad_norm": 0.500385582447052, |
| "learning_rate": 1.0420348723385043e-07, |
| "loss": 0.0279, |
| "step": 18630 |
| }, |
| { |
| "epoch": 17.871524448705657, |
| "grad_norm": 0.28065744042396545, |
| "learning_rate": 9.866357858642205e-08, |
| "loss": 0.024, |
| "step": 18640 |
| }, |
| { |
| "epoch": 17.88111217641419, |
| "grad_norm": 0.22515705227851868, |
| "learning_rate": 9.32748370802039e-08, |
| "loss": 0.0273, |
| "step": 18650 |
| }, |
| { |
| "epoch": 17.890699904122723, |
| "grad_norm": 0.4083874225616455, |
| "learning_rate": 8.803727903942127e-08, |
| "loss": 0.0269, |
| "step": 18660 |
| }, |
| { |
| "epoch": 17.900287631831254, |
| "grad_norm": 0.3455846309661865, |
| "learning_rate": 8.295092033031027e-08, |
| "loss": 0.0277, |
| "step": 18670 |
| }, |
| { |
| "epoch": 17.90987535953979, |
| "grad_norm": 0.15052051842212677, |
| "learning_rate": 7.801577636108448e-08, |
| "loss": 0.0358, |
| "step": 18680 |
| }, |
| { |
| "epoch": 17.91946308724832, |
| "grad_norm": 0.21173402667045593, |
| "learning_rate": 7.323186208188504e-08, |
| "loss": 0.0256, |
| "step": 18690 |
| }, |
| { |
| "epoch": 17.929050814956856, |
| "grad_norm": 0.3735136389732361, |
| "learning_rate": 6.859919198470288e-08, |
| "loss": 0.031, |
| "step": 18700 |
| }, |
| { |
| "epoch": 17.938638542665387, |
| "grad_norm": 0.2103312462568283, |
| "learning_rate": 6.411778010340097e-08, |
| "loss": 0.0322, |
| "step": 18710 |
| }, |
| { |
| "epoch": 17.948226270373922, |
| "grad_norm": 0.19569391012191772, |
| "learning_rate": 5.978764001359771e-08, |
| "loss": 0.0291, |
| "step": 18720 |
| }, |
| { |
| "epoch": 17.957813998082454, |
| "grad_norm": 0.25286465883255005, |
| "learning_rate": 5.5608784832683616e-08, |
| "loss": 0.0277, |
| "step": 18730 |
| }, |
| { |
| "epoch": 17.96740172579099, |
| "grad_norm": 0.2856442332267761, |
| "learning_rate": 5.158122721974357e-08, |
| "loss": 0.0254, |
| "step": 18740 |
| }, |
| { |
| "epoch": 17.97698945349952, |
| "grad_norm": 0.15211383998394012, |
| "learning_rate": 4.770497937554574e-08, |
| "loss": 0.024, |
| "step": 18750 |
| }, |
| { |
| "epoch": 17.986577181208055, |
| "grad_norm": 0.28586897253990173, |
| "learning_rate": 4.398005304248609e-08, |
| "loss": 0.0239, |
| "step": 18760 |
| }, |
| { |
| "epoch": 17.996164908916587, |
| "grad_norm": 0.18181052803993225, |
| "learning_rate": 4.0406459504555016e-08, |
| "loss": 0.0236, |
| "step": 18770 |
| }, |
| { |
| "epoch": 18.005752636625118, |
| "grad_norm": 0.19704671204090118, |
| "learning_rate": 3.698420958732074e-08, |
| "loss": 0.0251, |
| "step": 18780 |
| }, |
| { |
| "epoch": 18.015340364333653, |
| "grad_norm": 0.19747470319271088, |
| "learning_rate": 3.371331365786823e-08, |
| "loss": 0.0313, |
| "step": 18790 |
| }, |
| { |
| "epoch": 18.024928092042185, |
| "grad_norm": 0.23974737524986267, |
| "learning_rate": 3.05937816247992e-08, |
| "loss": 0.0334, |
| "step": 18800 |
| }, |
| { |
| "epoch": 18.03451581975072, |
| "grad_norm": 0.31815865635871887, |
| "learning_rate": 2.7625622938165507e-08, |
| "loss": 0.025, |
| "step": 18810 |
| }, |
| { |
| "epoch": 18.04410354745925, |
| "grad_norm": 0.14651015400886536, |
| "learning_rate": 2.4808846589474687e-08, |
| "loss": 0.0252, |
| "step": 18820 |
| }, |
| { |
| "epoch": 18.053691275167786, |
| "grad_norm": 0.31359338760375977, |
| "learning_rate": 2.214346111164556e-08, |
| "loss": 0.0255, |
| "step": 18830 |
| }, |
| { |
| "epoch": 18.063279002876317, |
| "grad_norm": 0.3521699607372284, |
| "learning_rate": 1.9629474578986008e-08, |
| "loss": 0.0229, |
| "step": 18840 |
| }, |
| { |
| "epoch": 18.072866730584852, |
| "grad_norm": 0.2816530168056488, |
| "learning_rate": 1.726689460716524e-08, |
| "loss": 0.0262, |
| "step": 18850 |
| }, |
| { |
| "epoch": 18.082454458293384, |
| "grad_norm": 0.27596089243888855, |
| "learning_rate": 1.5055728353191578e-08, |
| "loss": 0.0266, |
| "step": 18860 |
| }, |
| { |
| "epoch": 18.09204218600192, |
| "grad_norm": 0.25768667459487915, |
| "learning_rate": 1.2995982515406901e-08, |
| "loss": 0.0273, |
| "step": 18870 |
| }, |
| { |
| "epoch": 18.10162991371045, |
| "grad_norm": 0.13152585923671722, |
| "learning_rate": 1.1087663333431141e-08, |
| "loss": 0.0268, |
| "step": 18880 |
| }, |
| { |
| "epoch": 18.111217641418985, |
| "grad_norm": 0.1559949666261673, |
| "learning_rate": 9.330776588184487e-09, |
| "loss": 0.0307, |
| "step": 18890 |
| }, |
| { |
| "epoch": 18.120805369127517, |
| "grad_norm": 0.25546255707740784, |
| "learning_rate": 7.725327601826315e-09, |
| "loss": 0.0254, |
| "step": 18900 |
| }, |
| { |
| "epoch": 18.13039309683605, |
| "grad_norm": 0.17455005645751953, |
| "learning_rate": 6.271321237788508e-09, |
| "loss": 0.0331, |
| "step": 18910 |
| }, |
| { |
| "epoch": 18.139980824544583, |
| "grad_norm": 0.25416553020477295, |
| "learning_rate": 4.9687619007199316e-09, |
| "loss": 0.0332, |
| "step": 18920 |
| }, |
| { |
| "epoch": 18.149568552253115, |
| "grad_norm": 0.19471152126789093, |
| "learning_rate": 3.817653536480892e-09, |
| "loss": 0.0248, |
| "step": 18930 |
| }, |
| { |
| "epoch": 18.15915627996165, |
| "grad_norm": 0.26644882559776306, |
| "learning_rate": 2.8179996321597845e-09, |
| "loss": 0.0248, |
| "step": 18940 |
| }, |
| { |
| "epoch": 18.16874400767018, |
| "grad_norm": 0.18680621683597565, |
| "learning_rate": 1.9698032160231363e-09, |
| "loss": 0.0252, |
| "step": 18950 |
| }, |
| { |
| "epoch": 18.178331735378716, |
| "grad_norm": 0.22466066479682922, |
| "learning_rate": 1.2730668575322569e-09, |
| "loss": 0.0221, |
| "step": 18960 |
| }, |
| { |
| "epoch": 18.187919463087248, |
| "grad_norm": 0.27246662974357605, |
| "learning_rate": 7.277926673210367e-10, |
| "loss": 0.0258, |
| "step": 18970 |
| }, |
| { |
| "epoch": 18.197507190795783, |
| "grad_norm": 0.17329837381839752, |
| "learning_rate": 3.3398229720149607e-10, |
| "loss": 0.0266, |
| "step": 18980 |
| }, |
| { |
| "epoch": 18.207094918504314, |
| "grad_norm": 0.3577910363674164, |
| "learning_rate": 9.163694015268398e-11, |
| "loss": 0.0296, |
| "step": 18990 |
| }, |
| { |
| "epoch": 18.21668264621285, |
| "grad_norm": 0.24373145401477814, |
| "learning_rate": 7.57330315126481e-13, |
| "loss": 0.029, |
| "step": 19000 |
| }, |
| { |
| "epoch": 18.21668264621285, |
| "step": 19000, |
| "total_flos": 0.0, |
| "train_loss": 0.04628433942951654, |
| "train_runtime": 5633.6681, |
| "train_samples_per_second": 107.923, |
| "train_steps_per_second": 3.373 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 19000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 19, |
| "save_steps": 20000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|