| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 8004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0037488284910965324, | |
| "grad_norm": 5.261307048988544, | |
| "learning_rate": 1.1235955056179776e-07, | |
| "loss": 0.6121, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007497656982193065, | |
| "grad_norm": 5.15614665529525, | |
| "learning_rate": 2.3720349563046193e-07, | |
| "loss": 0.609, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011246485473289597, | |
| "grad_norm": 4.807893824154755, | |
| "learning_rate": 3.620474406991261e-07, | |
| "loss": 0.6041, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01499531396438613, | |
| "grad_norm": 3.702189980946651, | |
| "learning_rate": 4.868913857677903e-07, | |
| "loss": 0.5865, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01874414245548266, | |
| "grad_norm": 3.348870199432412, | |
| "learning_rate": 6.117353308364544e-07, | |
| "loss": 0.5738, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.022492970946579195, | |
| "grad_norm": 1.3717180565873228, | |
| "learning_rate": 7.365792759051186e-07, | |
| "loss": 0.5303, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.026241799437675725, | |
| "grad_norm": 1.2344497017475258, | |
| "learning_rate": 8.614232209737828e-07, | |
| "loss": 0.5046, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02999062792877226, | |
| "grad_norm": 0.9972181073865652, | |
| "learning_rate": 9.86267166042447e-07, | |
| "loss": 0.4776, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.033739456419868794, | |
| "grad_norm": 0.5107539966752407, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.4698, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03748828491096532, | |
| "grad_norm": 0.38982019488173025, | |
| "learning_rate": 1.2359550561797752e-06, | |
| "loss": 0.464, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 0.3325139789027463, | |
| "learning_rate": 1.3607990012484395e-06, | |
| "loss": 0.4446, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04498594189315839, | |
| "grad_norm": 0.27751561949048015, | |
| "learning_rate": 1.4856429463171037e-06, | |
| "loss": 0.4386, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04873477038425492, | |
| "grad_norm": 0.26409663300412245, | |
| "learning_rate": 1.6104868913857679e-06, | |
| "loss": 0.4394, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05248359887535145, | |
| "grad_norm": 0.25322270864108015, | |
| "learning_rate": 1.735330836454432e-06, | |
| "loss": 0.4347, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.056232427366447985, | |
| "grad_norm": 0.24385171974327005, | |
| "learning_rate": 1.8601747815230963e-06, | |
| "loss": 0.4232, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05998125585754452, | |
| "grad_norm": 0.2507796927254884, | |
| "learning_rate": 1.9850187265917605e-06, | |
| "loss": 0.4278, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06373008434864105, | |
| "grad_norm": 0.24135744232005513, | |
| "learning_rate": 2.1098626716604245e-06, | |
| "loss": 0.4194, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06747891283973759, | |
| "grad_norm": 0.23616171555517834, | |
| "learning_rate": 2.234706616729089e-06, | |
| "loss": 0.4154, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07122774133083412, | |
| "grad_norm": 0.23673999658027137, | |
| "learning_rate": 2.359550561797753e-06, | |
| "loss": 0.4135, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07497656982193064, | |
| "grad_norm": 0.23052101463745947, | |
| "learning_rate": 2.484394506866417e-06, | |
| "loss": 0.4129, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07872539831302718, | |
| "grad_norm": 0.2475027892360779, | |
| "learning_rate": 2.6092384519350818e-06, | |
| "loss": 0.4082, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "grad_norm": 0.2455114921254245, | |
| "learning_rate": 2.7340823970037454e-06, | |
| "loss": 0.4052, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08622305529522024, | |
| "grad_norm": 0.24513693066233658, | |
| "learning_rate": 2.8589263420724094e-06, | |
| "loss": 0.4046, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08997188378631678, | |
| "grad_norm": 0.2375006603458473, | |
| "learning_rate": 2.9837702871410738e-06, | |
| "loss": 0.4046, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 0.23868492253713985, | |
| "learning_rate": 3.1086142322097378e-06, | |
| "loss": 0.3975, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09746954076850985, | |
| "grad_norm": 0.2326427861557998, | |
| "learning_rate": 3.233458177278402e-06, | |
| "loss": 0.3999, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10121836925960637, | |
| "grad_norm": 0.23731014220097904, | |
| "learning_rate": 3.358302122347066e-06, | |
| "loss": 0.3974, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1049671977507029, | |
| "grad_norm": 0.2548285517151653, | |
| "learning_rate": 3.4831460674157306e-06, | |
| "loss": 0.3984, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10871602624179943, | |
| "grad_norm": 0.2415337350559026, | |
| "learning_rate": 3.6079900124843946e-06, | |
| "loss": 0.3916, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11246485473289597, | |
| "grad_norm": 0.26258238150885815, | |
| "learning_rate": 3.732833957553059e-06, | |
| "loss": 0.3906, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1162136832239925, | |
| "grad_norm": 0.26012016485357875, | |
| "learning_rate": 3.857677902621723e-06, | |
| "loss": 0.3889, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11996251171508904, | |
| "grad_norm": 0.2569432757214338, | |
| "learning_rate": 3.9825218476903875e-06, | |
| "loss": 0.3873, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "grad_norm": 0.23813642779308677, | |
| "learning_rate": 4.107365792759052e-06, | |
| "loss": 0.3879, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1274601686972821, | |
| "grad_norm": 0.2620384468843467, | |
| "learning_rate": 4.2322097378277155e-06, | |
| "loss": 0.3841, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.13120899718837864, | |
| "grad_norm": 0.2544756212917662, | |
| "learning_rate": 4.35705368289638e-06, | |
| "loss": 0.3881, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13495782567947517, | |
| "grad_norm": 0.23871678308328853, | |
| "learning_rate": 4.481897627965044e-06, | |
| "loss": 0.3819, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1387066541705717, | |
| "grad_norm": 0.27307451356383844, | |
| "learning_rate": 4.606741573033709e-06, | |
| "loss": 0.3849, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.14245548266166824, | |
| "grad_norm": 0.2982815099279342, | |
| "learning_rate": 4.731585518102372e-06, | |
| "loss": 0.384, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14620431115276475, | |
| "grad_norm": 0.27841785889657866, | |
| "learning_rate": 4.856429463171037e-06, | |
| "loss": 0.3797, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14995313964386128, | |
| "grad_norm": 0.2727860073948262, | |
| "learning_rate": 4.9812734082397e-06, | |
| "loss": 0.3826, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15370196813495782, | |
| "grad_norm": 0.25678949790726463, | |
| "learning_rate": 5.106117353308366e-06, | |
| "loss": 0.38, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15745079662605435, | |
| "grad_norm": 0.31173528136138384, | |
| "learning_rate": 5.230961298377028e-06, | |
| "loss": 0.3775, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16119962511715089, | |
| "grad_norm": 0.2952941783305471, | |
| "learning_rate": 5.355805243445693e-06, | |
| "loss": 0.3821, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 0.2908243225639686, | |
| "learning_rate": 5.480649188514357e-06, | |
| "loss": 0.3767, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16869728209934395, | |
| "grad_norm": 0.25818770698112115, | |
| "learning_rate": 5.6054931335830224e-06, | |
| "loss": 0.3753, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1724461105904405, | |
| "grad_norm": 0.3211832737453995, | |
| "learning_rate": 5.730337078651685e-06, | |
| "loss": 0.3763, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17619493908153702, | |
| "grad_norm": 0.3105251087664963, | |
| "learning_rate": 5.85518102372035e-06, | |
| "loss": 0.3758, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.17994376757263356, | |
| "grad_norm": 0.2764457811664437, | |
| "learning_rate": 5.980024968789014e-06, | |
| "loss": 0.3703, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1836925960637301, | |
| "grad_norm": 0.28650193996460066, | |
| "learning_rate": 6.104868913857679e-06, | |
| "loss": 0.3772, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 0.29068625167257756, | |
| "learning_rate": 6.229712858926342e-06, | |
| "loss": 0.3734, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19119025304592316, | |
| "grad_norm": 0.3259945542580016, | |
| "learning_rate": 6.3545568039950064e-06, | |
| "loss": 0.3734, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1949390815370197, | |
| "grad_norm": 0.2921563403060802, | |
| "learning_rate": 6.479400749063671e-06, | |
| "loss": 0.3708, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19868791002811623, | |
| "grad_norm": 0.28384494127638144, | |
| "learning_rate": 6.6042446941323344e-06, | |
| "loss": 0.3703, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.20243673851921273, | |
| "grad_norm": 0.35311062890426737, | |
| "learning_rate": 6.729088639200999e-06, | |
| "loss": 0.3686, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.3645818485106796, | |
| "learning_rate": 6.853932584269663e-06, | |
| "loss": 0.3665, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2099343955014058, | |
| "grad_norm": 0.29958381926712185, | |
| "learning_rate": 6.978776529338328e-06, | |
| "loss": 0.3703, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21368322399250234, | |
| "grad_norm": 0.2943930481951806, | |
| "learning_rate": 7.103620474406991e-06, | |
| "loss": 0.3686, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.21743205248359887, | |
| "grad_norm": 0.33743210095771675, | |
| "learning_rate": 7.228464419475656e-06, | |
| "loss": 0.364, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2211808809746954, | |
| "grad_norm": 0.30279035276936334, | |
| "learning_rate": 7.35330836454432e-06, | |
| "loss": 0.3664, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.22492970946579194, | |
| "grad_norm": 0.3222972752742645, | |
| "learning_rate": 7.4781523096129846e-06, | |
| "loss": 0.3644, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22867853795688847, | |
| "grad_norm": 0.38710479557491634, | |
| "learning_rate": 7.602996254681648e-06, | |
| "loss": 0.3647, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.232427366447985, | |
| "grad_norm": 0.37149432246081854, | |
| "learning_rate": 7.727840199750313e-06, | |
| "loss": 0.3654, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23617619493908154, | |
| "grad_norm": 0.3783560186385295, | |
| "learning_rate": 7.852684144818978e-06, | |
| "loss": 0.362, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.23992502343017807, | |
| "grad_norm": 0.37006311884362064, | |
| "learning_rate": 7.97752808988764e-06, | |
| "loss": 0.3615, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2436738519212746, | |
| "grad_norm": 0.34763658374372114, | |
| "learning_rate": 8.102372034956305e-06, | |
| "loss": 0.3632, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "grad_norm": 0.29073397245455634, | |
| "learning_rate": 8.22721598002497e-06, | |
| "loss": 0.3583, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2511715089034677, | |
| "grad_norm": 0.35203426090002876, | |
| "learning_rate": 8.352059925093634e-06, | |
| "loss": 0.3647, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2549203373945642, | |
| "grad_norm": 0.437139066758181, | |
| "learning_rate": 8.476903870162298e-06, | |
| "loss": 0.3585, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25866916588566075, | |
| "grad_norm": 0.3142538887122429, | |
| "learning_rate": 8.601747815230963e-06, | |
| "loss": 0.3568, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2624179943767573, | |
| "grad_norm": 0.4424671561181043, | |
| "learning_rate": 8.726591760299627e-06, | |
| "loss": 0.3592, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2661668228678538, | |
| "grad_norm": 0.4870824456249142, | |
| "learning_rate": 8.851435705368292e-06, | |
| "loss": 0.359, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.26991565135895035, | |
| "grad_norm": 0.37766488794403613, | |
| "learning_rate": 8.976279650436954e-06, | |
| "loss": 0.3573, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2736644798500469, | |
| "grad_norm": 0.37439610464624506, | |
| "learning_rate": 9.101123595505619e-06, | |
| "loss": 0.3618, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2774133083411434, | |
| "grad_norm": 0.4004032917727574, | |
| "learning_rate": 9.225967540574283e-06, | |
| "loss": 0.361, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 0.3373876121827577, | |
| "learning_rate": 9.350811485642946e-06, | |
| "loss": 0.359, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2849109653233365, | |
| "grad_norm": 0.36216174893173675, | |
| "learning_rate": 9.475655430711612e-06, | |
| "loss": 0.357, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "grad_norm": 0.42153204399869865, | |
| "learning_rate": 9.600499375780276e-06, | |
| "loss": 0.3577, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2924086223055295, | |
| "grad_norm": 0.35740754175627953, | |
| "learning_rate": 9.72534332084894e-06, | |
| "loss": 0.3547, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.29615745079662603, | |
| "grad_norm": 0.32327770051907684, | |
| "learning_rate": 9.850187265917604e-06, | |
| "loss": 0.357, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.29990627928772257, | |
| "grad_norm": 0.460840791598725, | |
| "learning_rate": 9.975031210986268e-06, | |
| "loss": 0.3556, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3036551077788191, | |
| "grad_norm": 0.42772161445538665, | |
| "learning_rate": 9.99996956365783e-06, | |
| "loss": 0.3552, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.30740393626991563, | |
| "grad_norm": 0.3837967533130784, | |
| "learning_rate": 9.999845916652828e-06, | |
| "loss": 0.3537, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.31115276476101217, | |
| "grad_norm": 0.3587190063419423, | |
| "learning_rate": 9.999627159063904e-06, | |
| "loss": 0.3522, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3149015932521087, | |
| "grad_norm": 0.37997976078876, | |
| "learning_rate": 9.999313295052418e-06, | |
| "loss": 0.354, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.31865042174320524, | |
| "grad_norm": 0.33994105517602696, | |
| "learning_rate": 9.998904330588908e-06, | |
| "loss": 0.352, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.32239925023430177, | |
| "grad_norm": 0.3874016806923687, | |
| "learning_rate": 9.998400273452987e-06, | |
| "loss": 0.3528, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3261480787253983, | |
| "grad_norm": 0.3219534280341289, | |
| "learning_rate": 9.997801133233184e-06, | |
| "loss": 0.3534, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 0.359125234314906, | |
| "learning_rate": 9.997106921326764e-06, | |
| "loss": 0.3537, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3336457357075914, | |
| "grad_norm": 0.3184315740360875, | |
| "learning_rate": 9.996317650939515e-06, | |
| "loss": 0.3528, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3373945641986879, | |
| "grad_norm": 0.5591690891723508, | |
| "learning_rate": 9.995433337085492e-06, | |
| "loss": 0.3522, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34114339268978444, | |
| "grad_norm": 0.3423810176135369, | |
| "learning_rate": 9.994453996586737e-06, | |
| "loss": 0.3501, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.344892221180881, | |
| "grad_norm": 0.40486938076977924, | |
| "learning_rate": 9.99337964807295e-06, | |
| "loss": 0.3496, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3486410496719775, | |
| "grad_norm": 0.4088267861862101, | |
| "learning_rate": 9.992210311981148e-06, | |
| "loss": 0.357, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.35238987816307404, | |
| "grad_norm": 0.3795197690451968, | |
| "learning_rate": 9.99094601055526e-06, | |
| "loss": 0.3495, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3561387066541706, | |
| "grad_norm": 0.37097930414190233, | |
| "learning_rate": 9.989586767845721e-06, | |
| "loss": 0.3469, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3598875351452671, | |
| "grad_norm": 0.3513423854824207, | |
| "learning_rate": 9.988132609708999e-06, | |
| "loss": 0.3457, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.36447802251950095, | |
| "learning_rate": 9.986583563807109e-06, | |
| "loss": 0.3486, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3673851921274602, | |
| "grad_norm": 0.3729932757859573, | |
| "learning_rate": 9.984939659607095e-06, | |
| "loss": 0.3507, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "grad_norm": 0.29469670304955903, | |
| "learning_rate": 9.983200928380461e-06, | |
| "loss": 0.3459, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 0.3654383892479426, | |
| "learning_rate": 9.981367403202569e-06, | |
| "loss": 0.3449, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3786316776007498, | |
| "grad_norm": 0.36618034043522385, | |
| "learning_rate": 9.979439118952026e-06, | |
| "loss": 0.3452, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3823805060918463, | |
| "grad_norm": 0.3577145469220018, | |
| "learning_rate": 9.977416112310012e-06, | |
| "loss": 0.3464, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.38612933458294285, | |
| "grad_norm": 0.556582145142578, | |
| "learning_rate": 9.97529842175958e-06, | |
| "loss": 0.3446, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3898781630740394, | |
| "grad_norm": 0.40442147020098534, | |
| "learning_rate": 9.973086087584929e-06, | |
| "loss": 0.3454, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3936269915651359, | |
| "grad_norm": 0.34956913654247584, | |
| "learning_rate": 9.970779151870634e-06, | |
| "loss": 0.3454, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.39737582005623245, | |
| "grad_norm": 0.40777603350082636, | |
| "learning_rate": 9.96837765850085e-06, | |
| "loss": 0.3459, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.40112464854732893, | |
| "grad_norm": 0.38403660939167644, | |
| "learning_rate": 9.96588165315847e-06, | |
| "loss": 0.3464, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.40487347703842547, | |
| "grad_norm": 0.4080672955719013, | |
| "learning_rate": 9.963291183324264e-06, | |
| "loss": 0.3441, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.408622305529522, | |
| "grad_norm": 0.3685239162827897, | |
| "learning_rate": 9.960606298275968e-06, | |
| "loss": 0.3464, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.3380919841293949, | |
| "learning_rate": 9.957827049087357e-06, | |
| "loss": 0.3433, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.41611996251171507, | |
| "grad_norm": 0.3315173931094776, | |
| "learning_rate": 9.954953488627258e-06, | |
| "loss": 0.3486, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4198687910028116, | |
| "grad_norm": 0.4016275857570284, | |
| "learning_rate": 9.951985671558559e-06, | |
| "loss": 0.3426, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.42361761949390814, | |
| "grad_norm": 0.35593601511143413, | |
| "learning_rate": 9.948923654337167e-06, | |
| "loss": 0.3453, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.42736644798500467, | |
| "grad_norm": 0.32764050829507113, | |
| "learning_rate": 9.945767495210921e-06, | |
| "loss": 0.3427, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4311152764761012, | |
| "grad_norm": 0.3022647428699131, | |
| "learning_rate": 9.942517254218503e-06, | |
| "loss": 0.3407, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.43486410496719774, | |
| "grad_norm": 0.33454020574954163, | |
| "learning_rate": 9.93917299318828e-06, | |
| "loss": 0.3411, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4386129334582943, | |
| "grad_norm": 0.41746428448570383, | |
| "learning_rate": 9.935734775737136e-06, | |
| "loss": 0.3386, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4423617619493908, | |
| "grad_norm": 0.4736902430441777, | |
| "learning_rate": 9.932202667269259e-06, | |
| "loss": 0.3437, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.44611059044048734, | |
| "grad_norm": 0.36913024892238444, | |
| "learning_rate": 9.928576734974903e-06, | |
| "loss": 0.3444, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4498594189315839, | |
| "grad_norm": 0.3085872877246692, | |
| "learning_rate": 9.924857047829097e-06, | |
| "loss": 0.3407, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "grad_norm": 0.5014477248612932, | |
| "learning_rate": 9.92104367659035e-06, | |
| "loss": 0.3402, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.45735707591377694, | |
| "grad_norm": 0.37208177374221185, | |
| "learning_rate": 9.917136693799287e-06, | |
| "loss": 0.3443, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4611059044048735, | |
| "grad_norm": 0.4736108253772323, | |
| "learning_rate": 9.91313617377728e-06, | |
| "loss": 0.3402, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.46485473289597, | |
| "grad_norm": 0.3989185456576625, | |
| "learning_rate": 9.909042192625038e-06, | |
| "loss": 0.3387, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 0.4452446526616405, | |
| "learning_rate": 9.904854828221142e-06, | |
| "loss": 0.3444, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4723523898781631, | |
| "grad_norm": 0.32502934619385765, | |
| "learning_rate": 9.900574160220589e-06, | |
| "loss": 0.3349, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4761012183692596, | |
| "grad_norm": 0.3626693442578511, | |
| "learning_rate": 9.896200270053248e-06, | |
| "loss": 0.3414, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.47985004686035615, | |
| "grad_norm": 0.32869562107129774, | |
| "learning_rate": 9.891733240922336e-06, | |
| "loss": 0.3422, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4835988753514527, | |
| "grad_norm": 0.37637832215175443, | |
| "learning_rate": 9.887173157802823e-06, | |
| "loss": 0.3366, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4873477038425492, | |
| "grad_norm": 0.3508655971923107, | |
| "learning_rate": 9.882520107439813e-06, | |
| "loss": 0.3386, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.49109653233364575, | |
| "grad_norm": 0.34109879872339993, | |
| "learning_rate": 9.877774178346901e-06, | |
| "loss": 0.3363, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 0.32811254268165824, | |
| "learning_rate": 9.87293546080449e-06, | |
| "loss": 0.3395, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4985941893158388, | |
| "grad_norm": 0.4254861700788238, | |
| "learning_rate": 9.868004046858063e-06, | |
| "loss": 0.3371, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5023430178069354, | |
| "grad_norm": 0.3785996603686958, | |
| "learning_rate": 9.862980030316445e-06, | |
| "loss": 0.3375, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5060918462980318, | |
| "grad_norm": 0.3514746852402266, | |
| "learning_rate": 9.857863506750008e-06, | |
| "loss": 0.3373, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5098406747891284, | |
| "grad_norm": 0.39720746535746887, | |
| "learning_rate": 9.852654573488865e-06, | |
| "loss": 0.3361, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5135895032802249, | |
| "grad_norm": 0.5262745224774735, | |
| "learning_rate": 9.847353329621001e-06, | |
| "loss": 0.338, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5173383317713215, | |
| "grad_norm": 0.39442611541662853, | |
| "learning_rate": 9.841959875990406e-06, | |
| "loss": 0.3354, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.521087160262418, | |
| "grad_norm": 0.4302778358785363, | |
| "learning_rate": 9.836474315195148e-06, | |
| "loss": 0.3354, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5248359887535146, | |
| "grad_norm": 0.35819495417141983, | |
| "learning_rate": 9.830896751585419e-06, | |
| "loss": 0.3385, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.528584817244611, | |
| "grad_norm": 0.4018817834079902, | |
| "learning_rate": 9.825227291261555e-06, | |
| "loss": 0.3372, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5323336457357076, | |
| "grad_norm": 0.45163931451375233, | |
| "learning_rate": 9.819466042072016e-06, | |
| "loss": 0.3361, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5360824742268041, | |
| "grad_norm": 0.41628377691191, | |
| "learning_rate": 9.813613113611336e-06, | |
| "loss": 0.3335, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5398313027179007, | |
| "grad_norm": 0.32539571890495544, | |
| "learning_rate": 9.807668617218033e-06, | |
| "loss": 0.3357, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5435801312089972, | |
| "grad_norm": 0.3226911203286496, | |
| "learning_rate": 9.801632665972496e-06, | |
| "loss": 0.3369, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5473289597000938, | |
| "grad_norm": 0.39760989762825094, | |
| "learning_rate": 9.795505374694833e-06, | |
| "loss": 0.3361, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5510777881911902, | |
| "grad_norm": 0.4106794192966664, | |
| "learning_rate": 9.78928685994269e-06, | |
| "loss": 0.3334, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5548266166822868, | |
| "grad_norm": 0.34122967448980807, | |
| "learning_rate": 9.78297724000902e-06, | |
| "loss": 0.3338, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5585754451733833, | |
| "grad_norm": 0.3568238195196395, | |
| "learning_rate": 9.776576634919853e-06, | |
| "loss": 0.3337, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 0.4345852471251167, | |
| "learning_rate": 9.770085166431998e-06, | |
| "loss": 0.3302, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5660731021555764, | |
| "grad_norm": 0.3501750906447824, | |
| "learning_rate": 9.763502958030733e-06, | |
| "loss": 0.3336, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.569821930646673, | |
| "grad_norm": 0.4280185568028667, | |
| "learning_rate": 9.75683013492745e-06, | |
| "loss": 0.3345, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5735707591377694, | |
| "grad_norm": 0.33687559054684085, | |
| "learning_rate": 9.750066824057286e-06, | |
| "loss": 0.3319, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5773195876288659, | |
| "grad_norm": 0.4603527064195899, | |
| "learning_rate": 9.74321315407669e-06, | |
| "loss": 0.3362, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5810684161199625, | |
| "grad_norm": 0.37047815009428875, | |
| "learning_rate": 9.736269255360993e-06, | |
| "loss": 0.3308, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.584817244611059, | |
| "grad_norm": 0.3618655285759886, | |
| "learning_rate": 9.729235260001919e-06, | |
| "loss": 0.333, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5885660731021556, | |
| "grad_norm": 0.3818659087459316, | |
| "learning_rate": 9.72211130180507e-06, | |
| "loss": 0.3323, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5923149015932521, | |
| "grad_norm": 0.3996155158067246, | |
| "learning_rate": 9.714897516287392e-06, | |
| "loss": 0.3346, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5960637300843487, | |
| "grad_norm": 0.37123696273112744, | |
| "learning_rate": 9.707594040674577e-06, | |
| "loss": 0.3361, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5998125585754451, | |
| "grad_norm": 0.3452230221109262, | |
| "learning_rate": 9.700201013898478e-06, | |
| "loss": 0.3349, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6035613870665417, | |
| "grad_norm": 0.500409003679961, | |
| "learning_rate": 9.692718576594447e-06, | |
| "loss": 0.3323, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6073102155576382, | |
| "grad_norm": 0.5303403579247664, | |
| "learning_rate": 9.685146871098663e-06, | |
| "loss": 0.3303, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6110590440487348, | |
| "grad_norm": 0.3376647352530898, | |
| "learning_rate": 9.677486041445436e-06, | |
| "loss": 0.3329, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6148078725398313, | |
| "grad_norm": 0.41119673564751275, | |
| "learning_rate": 9.669736233364448e-06, | |
| "loss": 0.3349, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 0.3574831617883723, | |
| "learning_rate": 9.661897594278e-06, | |
| "loss": 0.3345, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6223055295220243, | |
| "grad_norm": 0.3748467608566312, | |
| "learning_rate": 9.653970273298197e-06, | |
| "loss": 0.3325, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6260543580131209, | |
| "grad_norm": 0.3884786451043299, | |
| "learning_rate": 9.645954421224106e-06, | |
| "loss": 0.3316, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6298031865042174, | |
| "grad_norm": 0.35595037812559693, | |
| "learning_rate": 9.637850190538904e-06, | |
| "loss": 0.3268, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.633552014995314, | |
| "grad_norm": 0.34174290421878073, | |
| "learning_rate": 9.629657735406964e-06, | |
| "loss": 0.3279, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6373008434864105, | |
| "grad_norm": 0.36083476878974785, | |
| "learning_rate": 9.621377211670926e-06, | |
| "loss": 0.3307, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6410496719775071, | |
| "grad_norm": 0.3289272824820436, | |
| "learning_rate": 9.613008776848734e-06, | |
| "loss": 0.329, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6447985004686035, | |
| "grad_norm": 0.35744054622945465, | |
| "learning_rate": 9.604552590130638e-06, | |
| "loss": 0.3291, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6485473289597001, | |
| "grad_norm": 0.43446933745858823, | |
| "learning_rate": 9.596008812376167e-06, | |
| "loss": 0.3314, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6522961574507966, | |
| "grad_norm": 0.38294138333824385, | |
| "learning_rate": 9.587377606111067e-06, | |
| "loss": 0.3259, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 0.3524604957942587, | |
| "learning_rate": 9.578659135524214e-06, | |
| "loss": 0.3354, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6597938144329897, | |
| "grad_norm": 0.42690501583378293, | |
| "learning_rate": 9.569853566464482e-06, | |
| "loss": 0.3282, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6635426429240863, | |
| "grad_norm": 0.48509167399052666, | |
| "learning_rate": 9.560961066437595e-06, | |
| "loss": 0.3328, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6672914714151827, | |
| "grad_norm": 0.3313886004708506, | |
| "learning_rate": 9.551981804602943e-06, | |
| "loss": 0.327, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6710402999062793, | |
| "grad_norm": 0.4278444007339863, | |
| "learning_rate": 9.542915951770356e-06, | |
| "loss": 0.3298, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6747891283973758, | |
| "grad_norm": 0.43003182010366847, | |
| "learning_rate": 9.533763680396857e-06, | |
| "loss": 0.3299, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6785379568884724, | |
| "grad_norm": 0.38848251404003764, | |
| "learning_rate": 9.524525164583389e-06, | |
| "loss": 0.3299, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6822867853795689, | |
| "grad_norm": 0.3812957689804934, | |
| "learning_rate": 9.515200580071495e-06, | |
| "loss": 0.3303, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6860356138706654, | |
| "grad_norm": 0.5538363802153154, | |
| "learning_rate": 9.505790104239975e-06, | |
| "loss": 0.3289, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.689784442361762, | |
| "grad_norm": 0.4099362555822008, | |
| "learning_rate": 9.496293916101516e-06, | |
| "loss": 0.3303, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6935332708528584, | |
| "grad_norm": 0.511739746908337, | |
| "learning_rate": 9.486712196299285e-06, | |
| "loss": 0.3312, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.697282099343955, | |
| "grad_norm": 0.38232455442549296, | |
| "learning_rate": 9.477045127103495e-06, | |
| "loss": 0.3305, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7010309278350515, | |
| "grad_norm": 0.46907822057098875, | |
| "learning_rate": 9.467292892407926e-06, | |
| "loss": 0.327, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7047797563261481, | |
| "grad_norm": 0.333174592156295, | |
| "learning_rate": 9.457455677726447e-06, | |
| "loss": 0.3276, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7085285848172446, | |
| "grad_norm": 0.4504078695656366, | |
| "learning_rate": 9.447533670189472e-06, | |
| "loss": 0.3298, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7122774133083412, | |
| "grad_norm": 0.36606560970104657, | |
| "learning_rate": 9.437527058540398e-06, | |
| "loss": 0.3317, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7160262417994376, | |
| "grad_norm": 0.31085511164293034, | |
| "learning_rate": 9.427436033132033e-06, | |
| "loss": 0.3256, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7197750702905342, | |
| "grad_norm": 0.3485865828995088, | |
| "learning_rate": 9.417260785922953e-06, | |
| "loss": 0.3262, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7235238987816307, | |
| "grad_norm": 0.45177189070417023, | |
| "learning_rate": 9.407001510473861e-06, | |
| "loss": 0.3282, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.4145405256703436, | |
| "learning_rate": 9.396658401943913e-06, | |
| "loss": 0.3269, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7310215557638238, | |
| "grad_norm": 0.39368332737774225, | |
| "learning_rate": 9.386231657086984e-06, | |
| "loss": 0.3267, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7347703842549204, | |
| "grad_norm": 0.4086874229698859, | |
| "learning_rate": 9.37572147424795e-06, | |
| "loss": 0.3301, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7385192127460168, | |
| "grad_norm": 0.41879040405476353, | |
| "learning_rate": 9.365128053358896e-06, | |
| "loss": 0.3267, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7422680412371134, | |
| "grad_norm": 0.48491627309563456, | |
| "learning_rate": 9.35445159593532e-06, | |
| "loss": 0.3279, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7460168697282099, | |
| "grad_norm": 0.3400392892381988, | |
| "learning_rate": 9.343692305072302e-06, | |
| "loss": 0.3257, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 0.38335608109171726, | |
| "learning_rate": 9.332850385440637e-06, | |
| "loss": 0.3272, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.753514526710403, | |
| "grad_norm": 0.3872685747801623, | |
| "learning_rate": 9.32192604328294e-06, | |
| "loss": 0.3303, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7572633552014996, | |
| "grad_norm": 0.43431054195807284, | |
| "learning_rate": 9.31091948640973e-06, | |
| "loss": 0.3252, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.761012183692596, | |
| "grad_norm": 0.3987919949548056, | |
| "learning_rate": 9.299830924195468e-06, | |
| "loss": 0.3295, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7647610121836926, | |
| "grad_norm": 0.43495362781321384, | |
| "learning_rate": 9.28866056757458e-06, | |
| "loss": 0.3271, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7685098406747891, | |
| "grad_norm": 0.5319949922755799, | |
| "learning_rate": 9.277408629037442e-06, | |
| "loss": 0.3211, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7722586691658857, | |
| "grad_norm": 0.4399645846756319, | |
| "learning_rate": 9.266075322626338e-06, | |
| "loss": 0.3252, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7760074976569822, | |
| "grad_norm": 0.6456342041580845, | |
| "learning_rate": 9.254660863931392e-06, | |
| "loss": 0.3252, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7797563261480788, | |
| "grad_norm": 0.36547086873410367, | |
| "learning_rate": 9.243165470086463e-06, | |
| "loss": 0.3264, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7835051546391752, | |
| "grad_norm": 0.3872999565414706, | |
| "learning_rate": 9.23158935976501e-06, | |
| "loss": 0.3253, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7872539831302718, | |
| "grad_norm": 0.35985858368454743, | |
| "learning_rate": 9.219932753175944e-06, | |
| "loss": 0.3252, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7910028116213683, | |
| "grad_norm": 0.4412647339459317, | |
| "learning_rate": 9.208195872059429e-06, | |
| "loss": 0.324, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7947516401124649, | |
| "grad_norm": 0.3973882373774354, | |
| "learning_rate": 9.19637893968267e-06, | |
| "loss": 0.3261, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7985004686035614, | |
| "grad_norm": 0.357118509545678, | |
| "learning_rate": 9.184482180835662e-06, | |
| "loss": 0.3238, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8022492970946579, | |
| "grad_norm": 0.3223398993083127, | |
| "learning_rate": 9.172505821826911e-06, | |
| "loss": 0.325, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8059981255857545, | |
| "grad_norm": 0.3256784650503759, | |
| "learning_rate": 9.160450090479144e-06, | |
| "loss": 0.3229, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8097469540768509, | |
| "grad_norm": 0.550415905591969, | |
| "learning_rate": 9.148315216124954e-06, | |
| "loss": 0.3275, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8134957825679475, | |
| "grad_norm": 0.42518457436926477, | |
| "learning_rate": 9.136101429602451e-06, | |
| "loss": 0.327, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.817244611059044, | |
| "grad_norm": 0.4261965775006577, | |
| "learning_rate": 9.123808963250873e-06, | |
| "loss": 0.3225, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8209934395501406, | |
| "grad_norm": 0.43150733510100164, | |
| "learning_rate": 9.111438050906151e-06, | |
| "loss": 0.3279, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.37038125521844484, | |
| "learning_rate": 9.09898892789648e-06, | |
| "loss": 0.324, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8284910965323337, | |
| "grad_norm": 0.40692784507847907, | |
| "learning_rate": 9.08646183103783e-06, | |
| "loss": 0.3296, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8322399250234301, | |
| "grad_norm": 0.386868671740811, | |
| "learning_rate": 9.07385699862944e-06, | |
| "loss": 0.3215, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8359887535145267, | |
| "grad_norm": 0.3350868838762515, | |
| "learning_rate": 9.061174670449298e-06, | |
| "loss": 0.3246, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8397375820056232, | |
| "grad_norm": 0.34427971717421035, | |
| "learning_rate": 9.048415087749565e-06, | |
| "loss": 0.3242, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 0.3856288956960091, | |
| "learning_rate": 9.03557849325199e-06, | |
| "loss": 0.326, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8472352389878163, | |
| "grad_norm": 0.39608051805449296, | |
| "learning_rate": 9.022665131143303e-06, | |
| "loss": 0.3229, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8509840674789129, | |
| "grad_norm": 0.41794409702724045, | |
| "learning_rate": 9.00967524707055e-06, | |
| "loss": 0.3232, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8547328959700093, | |
| "grad_norm": 0.43169401522169315, | |
| "learning_rate": 8.996609088136444e-06, | |
| "loss": 0.3255, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8584817244611059, | |
| "grad_norm": 0.35305226663749295, | |
| "learning_rate": 8.98346690289464e-06, | |
| "loss": 0.3222, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8622305529522024, | |
| "grad_norm": 0.40183352362359215, | |
| "learning_rate": 8.970248941345028e-06, | |
| "loss": 0.3201, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.865979381443299, | |
| "grad_norm": 0.37025187300501505, | |
| "learning_rate": 8.956955454928966e-06, | |
| "loss": 0.3226, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8697282099343955, | |
| "grad_norm": 0.4229158348114115, | |
| "learning_rate": 8.943586696524495e-06, | |
| "loss": 0.3227, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8734770384254921, | |
| "grad_norm": 0.33306767880493077, | |
| "learning_rate": 8.930142920441536e-06, | |
| "loss": 0.3233, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8772258669165885, | |
| "grad_norm": 0.3664337194336669, | |
| "learning_rate": 8.916624382417052e-06, | |
| "loss": 0.3244, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8809746954076851, | |
| "grad_norm": 0.39368217476089484, | |
| "learning_rate": 8.903031339610172e-06, | |
| "loss": 0.3205, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8847235238987816, | |
| "grad_norm": 0.35736567969388083, | |
| "learning_rate": 8.889364050597315e-06, | |
| "loss": 0.3208, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8884723523898782, | |
| "grad_norm": 0.33000582790021804, | |
| "learning_rate": 8.87562277536726e-06, | |
| "loss": 0.3224, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8922211808809747, | |
| "grad_norm": 0.4427745690627731, | |
| "learning_rate": 8.861807775316205e-06, | |
| "loss": 0.3203, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8959700093720713, | |
| "grad_norm": 0.4066534130717824, | |
| "learning_rate": 8.847919313242792e-06, | |
| "loss": 0.3211, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8997188378631678, | |
| "grad_norm": 0.34844243986285434, | |
| "learning_rate": 8.833957653343112e-06, | |
| "loss": 0.3199, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9034676663542643, | |
| "grad_norm": 0.3594011818287422, | |
| "learning_rate": 8.819923061205674e-06, | |
| "loss": 0.321, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9072164948453608, | |
| "grad_norm": 0.34488001664929596, | |
| "learning_rate": 8.805815803806353e-06, | |
| "loss": 0.3255, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9109653233364574, | |
| "grad_norm": 0.3409284776369216, | |
| "learning_rate": 8.791636149503322e-06, | |
| "loss": 0.3195, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9147141518275539, | |
| "grad_norm": 0.3853629161293025, | |
| "learning_rate": 8.777384368031929e-06, | |
| "loss": 0.3216, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9184629803186504, | |
| "grad_norm": 0.34448988964699645, | |
| "learning_rate": 8.763060730499582e-06, | |
| "loss": 0.322, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.922211808809747, | |
| "grad_norm": 0.3382752164401171, | |
| "learning_rate": 8.748665509380582e-06, | |
| "loss": 0.3213, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9259606373008434, | |
| "grad_norm": 0.3396778964486745, | |
| "learning_rate": 8.73419897851095e-06, | |
| "loss": 0.3236, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.92970946579194, | |
| "grad_norm": 0.37725271996102505, | |
| "learning_rate": 8.7196614130832e-06, | |
| "loss": 0.3189, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9334582942830365, | |
| "grad_norm": 0.32462914306061436, | |
| "learning_rate": 8.705053089641125e-06, | |
| "loss": 0.3221, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 0.4369048990730118, | |
| "learning_rate": 8.690374286074522e-06, | |
| "loss": 0.3188, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9409559512652296, | |
| "grad_norm": 0.3760926470610608, | |
| "learning_rate": 8.675625281613914e-06, | |
| "loss": 0.3212, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9447047797563262, | |
| "grad_norm": 0.34740401778415597, | |
| "learning_rate": 8.660806356825226e-06, | |
| "loss": 0.3248, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9484536082474226, | |
| "grad_norm": 0.417666450800287, | |
| "learning_rate": 8.64591779360447e-06, | |
| "loss": 0.3233, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9522024367385192, | |
| "grad_norm": 0.42158210771195226, | |
| "learning_rate": 8.63095987517236e-06, | |
| "loss": 0.3221, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9559512652296157, | |
| "grad_norm": 0.4056842055888519, | |
| "learning_rate": 8.615932886068936e-06, | |
| "loss": 0.3244, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9597000937207123, | |
| "grad_norm": 0.2986528504566679, | |
| "learning_rate": 8.600837112148147e-06, | |
| "loss": 0.3244, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9634489222118088, | |
| "grad_norm": 0.4221810983370233, | |
| "learning_rate": 8.585672840572418e-06, | |
| "loss": 0.3171, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9671977507029054, | |
| "grad_norm": 0.32724721456300626, | |
| "learning_rate": 8.570440359807185e-06, | |
| "loss": 0.3221, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9709465791940018, | |
| "grad_norm": 0.4057823948549215, | |
| "learning_rate": 8.555139959615404e-06, | |
| "loss": 0.3161, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9746954076850984, | |
| "grad_norm": 0.30760675235109797, | |
| "learning_rate": 8.539771931052042e-06, | |
| "loss": 0.3202, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9784442361761949, | |
| "grad_norm": 0.4037579844772881, | |
| "learning_rate": 8.524336566458546e-06, | |
| "loss": 0.3189, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9821930646672915, | |
| "grad_norm": 0.3410849671328779, | |
| "learning_rate": 8.50883415945727e-06, | |
| "loss": 0.3196, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.985941893158388, | |
| "grad_norm": 0.3450287677089191, | |
| "learning_rate": 8.493265004945896e-06, | |
| "loss": 0.3221, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9896907216494846, | |
| "grad_norm": 0.42284107643629854, | |
| "learning_rate": 8.477629399091829e-06, | |
| "loss": 0.3207, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.993439550140581, | |
| "grad_norm": 0.3054813070655616, | |
| "learning_rate": 8.461927639326557e-06, | |
| "loss": 0.3167, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9971883786316776, | |
| "grad_norm": 0.37328989726555284, | |
| "learning_rate": 8.446160024339991e-06, | |
| "loss": 0.3236, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.0007497656982194, | |
| "grad_norm": 0.30788577364316916, | |
| "learning_rate": 8.430326854074787e-06, | |
| "loss": 0.3214, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.0044985941893159, | |
| "grad_norm": 0.35820789954813653, | |
| "learning_rate": 8.41442842972064e-06, | |
| "loss": 0.3168, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0082474226804123, | |
| "grad_norm": 0.3817197374504937, | |
| "learning_rate": 8.398465053708555e-06, | |
| "loss": 0.3161, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.0119962511715088, | |
| "grad_norm": 0.4062171398157765, | |
| "learning_rate": 8.382437029705096e-06, | |
| "loss": 0.3155, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0157450796626055, | |
| "grad_norm": 0.3540693852002706, | |
| "learning_rate": 8.3663446626066e-06, | |
| "loss": 0.3144, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.019493908153702, | |
| "grad_norm": 0.4029712327717952, | |
| "learning_rate": 8.350188258533387e-06, | |
| "loss": 0.3162, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.0232427366447985, | |
| "grad_norm": 0.3159333545246974, | |
| "learning_rate": 8.333968124823935e-06, | |
| "loss": 0.3168, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.026991565135895, | |
| "grad_norm": 0.33529821419279554, | |
| "learning_rate": 8.31768457002903e-06, | |
| "loss": 0.3144, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.0307403936269917, | |
| "grad_norm": 0.29642181276226026, | |
| "learning_rate": 8.301337903905895e-06, | |
| "loss": 0.3123, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.0344892221180881, | |
| "grad_norm": 0.3454172596114213, | |
| "learning_rate": 8.28492843741231e-06, | |
| "loss": 0.315, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.0382380506091846, | |
| "grad_norm": 0.34066649262255677, | |
| "learning_rate": 8.26845648270068e-06, | |
| "loss": 0.3142, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.041986879100281, | |
| "grad_norm": 0.43684514447857653, | |
| "learning_rate": 8.251922353112108e-06, | |
| "loss": 0.3146, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.0457357075913778, | |
| "grad_norm": 0.3242821399353603, | |
| "learning_rate": 8.235326363170428e-06, | |
| "loss": 0.313, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.0494845360824743, | |
| "grad_norm": 0.3789281542297383, | |
| "learning_rate": 8.21866882857623e-06, | |
| "loss": 0.31, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0532333645735708, | |
| "grad_norm": 0.3248576385401445, | |
| "learning_rate": 8.201950066200848e-06, | |
| "loss": 0.311, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.0569821930646672, | |
| "grad_norm": 0.36161121635161686, | |
| "learning_rate": 8.185170394080331e-06, | |
| "loss": 0.3153, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.060731021555764, | |
| "grad_norm": 0.4276056542533018, | |
| "learning_rate": 8.168330131409401e-06, | |
| "loss": 0.3123, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.0644798500468604, | |
| "grad_norm": 0.3567320922642298, | |
| "learning_rate": 8.15142959853537e-06, | |
| "loss": 0.314, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.0682286785379569, | |
| "grad_norm": 0.3412615684823743, | |
| "learning_rate": 8.134469116952058e-06, | |
| "loss": 0.3138, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.0719775070290534, | |
| "grad_norm": 0.3526545824695297, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.3137, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.0757263355201498, | |
| "grad_norm": 0.3601042619190153, | |
| "learning_rate": 8.100369599328653e-06, | |
| "loss": 0.3145, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.0794751640112465, | |
| "grad_norm": 0.4413371086788278, | |
| "learning_rate": 8.083231211953556e-06, | |
| "loss": 0.3183, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.083223992502343, | |
| "grad_norm": 0.35663942982642843, | |
| "learning_rate": 8.06603417318683e-06, | |
| "loss": 0.3137, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.0869728209934395, | |
| "grad_norm": 0.30103176112371444, | |
| "learning_rate": 8.048778810162638e-06, | |
| "loss": 0.3158, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.090721649484536, | |
| "grad_norm": 0.5017990471408166, | |
| "learning_rate": 8.031465451124623e-06, | |
| "loss": 0.3104, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.0944704779756327, | |
| "grad_norm": 0.30760999555771873, | |
| "learning_rate": 8.014094425419672e-06, | |
| "loss": 0.3093, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.0982193064667292, | |
| "grad_norm": 0.3744914287772446, | |
| "learning_rate": 7.99666606349165e-06, | |
| "loss": 0.3143, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.1019681349578256, | |
| "grad_norm": 0.321847870026043, | |
| "learning_rate": 7.979180696875107e-06, | |
| "loss": 0.3148, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1057169634489221, | |
| "grad_norm": 0.34119760664325655, | |
| "learning_rate": 7.961638658188982e-06, | |
| "loss": 0.3156, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1094657919400188, | |
| "grad_norm": 0.3046924003401573, | |
| "learning_rate": 7.944040281130266e-06, | |
| "loss": 0.3112, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.1132146204311153, | |
| "grad_norm": 0.34431924035163775, | |
| "learning_rate": 7.926385900467656e-06, | |
| "loss": 0.308, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1169634489222118, | |
| "grad_norm": 0.322861633348364, | |
| "learning_rate": 7.908675852035198e-06, | |
| "loss": 0.3115, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.1207122774133083, | |
| "grad_norm": 0.4011613509203003, | |
| "learning_rate": 7.89091047272588e-06, | |
| "loss": 0.3141, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.124461105904405, | |
| "grad_norm": 0.4644811139880584, | |
| "learning_rate": 7.873090100485235e-06, | |
| "loss": 0.3119, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.1282099343955014, | |
| "grad_norm": 0.3288163794026555, | |
| "learning_rate": 7.855215074304913e-06, | |
| "loss": 0.3115, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.131958762886598, | |
| "grad_norm": 0.3862750722060379, | |
| "learning_rate": 7.837285734216228e-06, | |
| "loss": 0.3108, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.1357075913776944, | |
| "grad_norm": 0.4449512249542337, | |
| "learning_rate": 7.819302421283692e-06, | |
| "loss": 0.3121, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.139456419868791, | |
| "grad_norm": 0.306482330717066, | |
| "learning_rate": 7.801265477598525e-06, | |
| "loss": 0.3155, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.1432052483598876, | |
| "grad_norm": 0.3537936797605087, | |
| "learning_rate": 7.783175246272151e-06, | |
| "loss": 0.3137, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.146954076850984, | |
| "grad_norm": 0.39316069541271836, | |
| "learning_rate": 7.765032071429669e-06, | |
| "loss": 0.3158, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.1507029053420805, | |
| "grad_norm": 0.3496204259814267, | |
| "learning_rate": 7.7468362982033e-06, | |
| "loss": 0.3148, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.1544517338331772, | |
| "grad_norm": 0.34318492953087437, | |
| "learning_rate": 7.728588272725838e-06, | |
| "loss": 0.3143, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.1582005623242737, | |
| "grad_norm": 0.33547854591563525, | |
| "learning_rate": 7.710288342124053e-06, | |
| "loss": 0.3117, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1619493908153702, | |
| "grad_norm": 0.33238683759141413, | |
| "learning_rate": 7.691936854512089e-06, | |
| "loss": 0.3122, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1656982193064667, | |
| "grad_norm": 0.3364307963397178, | |
| "learning_rate": 7.673534158984843e-06, | |
| "loss": 0.3118, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1694470477975631, | |
| "grad_norm": 0.3388871161978866, | |
| "learning_rate": 7.655080605611326e-06, | |
| "loss": 0.3107, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.1731958762886598, | |
| "grad_norm": 0.32235529671903823, | |
| "learning_rate": 7.636576545428006e-06, | |
| "loss": 0.3115, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.1769447047797563, | |
| "grad_norm": 0.41170615872376826, | |
| "learning_rate": 7.618022330432122e-06, | |
| "loss": 0.3112, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.1806935332708528, | |
| "grad_norm": 0.38112866816887303, | |
| "learning_rate": 7.599418313574997e-06, | |
| "loss": 0.3134, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1844423617619495, | |
| "grad_norm": 0.4177346294198595, | |
| "learning_rate": 7.580764848755315e-06, | |
| "loss": 0.3129, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.188191190253046, | |
| "grad_norm": 0.44163782423530795, | |
| "learning_rate": 7.5620622908124e-06, | |
| "loss": 0.3129, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.1919400187441425, | |
| "grad_norm": 0.32956971930691387, | |
| "learning_rate": 7.543310995519457e-06, | |
| "loss": 0.3116, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.195688847235239, | |
| "grad_norm": 0.3452689739953157, | |
| "learning_rate": 7.524511319576808e-06, | |
| "loss": 0.3118, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.1994376757263354, | |
| "grad_norm": 0.3304621485731618, | |
| "learning_rate": 7.5056636206051014e-06, | |
| "loss": 0.3151, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2031865042174321, | |
| "grad_norm": 0.32694078694361833, | |
| "learning_rate": 7.486768257138519e-06, | |
| "loss": 0.3126, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.2069353327085286, | |
| "grad_norm": 0.298213189088602, | |
| "learning_rate": 7.4678255886179495e-06, | |
| "loss": 0.3104, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.210684161199625, | |
| "grad_norm": 0.3102729131161918, | |
| "learning_rate": 7.44883597538415e-06, | |
| "loss": 0.3119, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.2144329896907216, | |
| "grad_norm": 0.3615016329359893, | |
| "learning_rate": 7.429799778670892e-06, | |
| "loss": 0.312, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.2181818181818183, | |
| "grad_norm": 0.27320951055269593, | |
| "learning_rate": 7.410717360598091e-06, | |
| "loss": 0.3122, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.2219306466729147, | |
| "grad_norm": 0.32659263816751305, | |
| "learning_rate": 7.3915890841649185e-06, | |
| "loss": 0.3091, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.2256794751640112, | |
| "grad_norm": 0.3334234925194134, | |
| "learning_rate": 7.3724153132429e-06, | |
| "loss": 0.3098, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.2294283036551077, | |
| "grad_norm": 0.2954664212502977, | |
| "learning_rate": 7.353196412568981e-06, | |
| "loss": 0.3109, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.2331771321462044, | |
| "grad_norm": 0.3625209301919828, | |
| "learning_rate": 7.333932747738604e-06, | |
| "loss": 0.3111, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.2369259606373009, | |
| "grad_norm": 0.3777090990507428, | |
| "learning_rate": 7.314624685198739e-06, | |
| "loss": 0.3123, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.2406747891283973, | |
| "grad_norm": 0.3213218051422179, | |
| "learning_rate": 7.295272592240931e-06, | |
| "loss": 0.3135, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.2444236176194938, | |
| "grad_norm": 0.3517340125442994, | |
| "learning_rate": 7.275876836994293e-06, | |
| "loss": 0.3098, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.2481724461105905, | |
| "grad_norm": 0.2958254932033676, | |
| "learning_rate": 7.256437788418518e-06, | |
| "loss": 0.3117, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.251921274601687, | |
| "grad_norm": 0.36561257727299235, | |
| "learning_rate": 7.236955816296853e-06, | |
| "loss": 0.3122, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.2556701030927835, | |
| "grad_norm": 0.3058483367622925, | |
| "learning_rate": 7.217431291229068e-06, | |
| "loss": 0.312, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.25941893158388, | |
| "grad_norm": 0.360042705506358, | |
| "learning_rate": 7.197864584624404e-06, | |
| "loss": 0.3098, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.2631677600749764, | |
| "grad_norm": 0.33352021186668945, | |
| "learning_rate": 7.178256068694511e-06, | |
| "loss": 0.3103, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2669165885660731, | |
| "grad_norm": 0.35734952787552315, | |
| "learning_rate": 7.158606116446364e-06, | |
| "loss": 0.3102, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.2706654170571696, | |
| "grad_norm": 0.39449641298830695, | |
| "learning_rate": 7.138915101675165e-06, | |
| "loss": 0.3102, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.274414245548266, | |
| "grad_norm": 0.3714471907535279, | |
| "learning_rate": 7.1191833989572435e-06, | |
| "loss": 0.3123, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2781630740393628, | |
| "grad_norm": 0.3042416177198767, | |
| "learning_rate": 7.099411383642918e-06, | |
| "loss": 0.312, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.2819119025304593, | |
| "grad_norm": 0.32047368301258783, | |
| "learning_rate": 7.079599431849364e-06, | |
| "loss": 0.3121, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.2856607310215558, | |
| "grad_norm": 0.41019651538058094, | |
| "learning_rate": 7.059747920453458e-06, | |
| "loss": 0.3103, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.2894095595126522, | |
| "grad_norm": 0.3551928742197105, | |
| "learning_rate": 7.0398572270846034e-06, | |
| "loss": 0.3111, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.2931583880037487, | |
| "grad_norm": 0.3565239012658027, | |
| "learning_rate": 7.019927730117553e-06, | |
| "loss": 0.3108, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2969072164948454, | |
| "grad_norm": 0.40917892127860045, | |
| "learning_rate": 6.999959808665208e-06, | |
| "loss": 0.3095, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.300656044985942, | |
| "grad_norm": 0.3769832501031786, | |
| "learning_rate": 6.979953842571409e-06, | |
| "loss": 0.3091, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.3044048734770384, | |
| "grad_norm": 0.370975430040192, | |
| "learning_rate": 6.959910212403708e-06, | |
| "loss": 0.3081, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.308153701968135, | |
| "grad_norm": 0.33276491617365694, | |
| "learning_rate": 6.939829299446127e-06, | |
| "loss": 0.3099, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.3119025304592316, | |
| "grad_norm": 0.37547647443532434, | |
| "learning_rate": 6.919711485691909e-06, | |
| "loss": 0.3101, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.315651358950328, | |
| "grad_norm": 0.38523049314373486, | |
| "learning_rate": 6.899557153836252e-06, | |
| "loss": 0.3104, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.3194001874414245, | |
| "grad_norm": 0.32838375873093845, | |
| "learning_rate": 6.8793666872690224e-06, | |
| "loss": 0.3095, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.323149015932521, | |
| "grad_norm": 0.3183180402450787, | |
| "learning_rate": 6.859140470067471e-06, | |
| "loss": 0.3096, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.3268978444236177, | |
| "grad_norm": 0.3629948359100712, | |
| "learning_rate": 6.838878886988921e-06, | |
| "loss": 0.3121, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.3306466729147142, | |
| "grad_norm": 0.3533555709563143, | |
| "learning_rate": 6.818582323463447e-06, | |
| "loss": 0.3123, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.3343955014058106, | |
| "grad_norm": 0.33208001817653265, | |
| "learning_rate": 6.798251165586554e-06, | |
| "loss": 0.3049, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.3381443298969073, | |
| "grad_norm": 0.3002792502712748, | |
| "learning_rate": 6.777885800111814e-06, | |
| "loss": 0.3142, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.3418931583880038, | |
| "grad_norm": 0.35269336353853314, | |
| "learning_rate": 6.757486614443528e-06, | |
| "loss": 0.3098, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.3456419868791003, | |
| "grad_norm": 0.3461464681174475, | |
| "learning_rate": 6.737053996629349e-06, | |
| "loss": 0.3127, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.3493908153701968, | |
| "grad_norm": 0.3240065140465353, | |
| "learning_rate": 6.716588335352894e-06, | |
| "loss": 0.3112, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.3531396438612933, | |
| "grad_norm": 0.3906540626649883, | |
| "learning_rate": 6.69609001992636e-06, | |
| "loss": 0.3067, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.3568884723523897, | |
| "grad_norm": 0.2896675578566779, | |
| "learning_rate": 6.675559440283115e-06, | |
| "loss": 0.3088, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.3606373008434864, | |
| "grad_norm": 0.37220517825735044, | |
| "learning_rate": 6.654996986970277e-06, | |
| "loss": 0.3116, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.364386129334583, | |
| "grad_norm": 0.36451423055829396, | |
| "learning_rate": 6.634403051141287e-06, | |
| "loss": 0.3096, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.3681349578256794, | |
| "grad_norm": 0.33555700523774556, | |
| "learning_rate": 6.613778024548471e-06, | |
| "loss": 0.3059, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.371883786316776, | |
| "grad_norm": 0.3589735145982526, | |
| "learning_rate": 6.593122299535583e-06, | |
| "loss": 0.3082, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.3756326148078726, | |
| "grad_norm": 0.3535758021471356, | |
| "learning_rate": 6.572436269030349e-06, | |
| "loss": 0.3056, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.379381443298969, | |
| "grad_norm": 0.3139302849140221, | |
| "learning_rate": 6.55172032653698e-06, | |
| "loss": 0.3071, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.3831302717900655, | |
| "grad_norm": 0.30876175987731125, | |
| "learning_rate": 6.530974866128699e-06, | |
| "loss": 0.3119, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.386879100281162, | |
| "grad_norm": 0.29612839549415565, | |
| "learning_rate": 6.510200282440235e-06, | |
| "loss": 0.3089, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3906279287722587, | |
| "grad_norm": 0.367321799885979, | |
| "learning_rate": 6.489396970660327e-06, | |
| "loss": 0.3101, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.3943767572633552, | |
| "grad_norm": 0.3185186823770553, | |
| "learning_rate": 6.4685653265241965e-06, | |
| "loss": 0.3111, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.3981255857544517, | |
| "grad_norm": 0.3767241891962025, | |
| "learning_rate": 6.447705746306022e-06, | |
| "loss": 0.3094, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.4018744142455484, | |
| "grad_norm": 0.33019970567321594, | |
| "learning_rate": 6.426818626811402e-06, | |
| "loss": 0.3132, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.4056232427366449, | |
| "grad_norm": 0.32094766671797814, | |
| "learning_rate": 6.405904365369807e-06, | |
| "loss": 0.3088, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.4093720712277413, | |
| "grad_norm": 0.2826846139255292, | |
| "learning_rate": 6.384963359827023e-06, | |
| "loss": 0.3081, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.4131208997188378, | |
| "grad_norm": 0.32741886750116905, | |
| "learning_rate": 6.3639960085375765e-06, | |
| "loss": 0.3053, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.4168697282099343, | |
| "grad_norm": 0.317490371811339, | |
| "learning_rate": 6.343002710357164e-06, | |
| "loss": 0.3074, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.420618556701031, | |
| "grad_norm": 0.41306311997301026, | |
| "learning_rate": 6.321983864635064e-06, | |
| "loss": 0.3057, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.4243673851921275, | |
| "grad_norm": 0.38554380688703854, | |
| "learning_rate": 6.300939871206534e-06, | |
| "loss": 0.3105, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.428116213683224, | |
| "grad_norm": 0.32564252337266814, | |
| "learning_rate": 6.279871130385212e-06, | |
| "loss": 0.3072, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.4318650421743206, | |
| "grad_norm": 0.2827430544551273, | |
| "learning_rate": 6.258778042955498e-06, | |
| "loss": 0.3086, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.4356138706654171, | |
| "grad_norm": 0.29456325510951775, | |
| "learning_rate": 6.2376610101649286e-06, | |
| "loss": 0.3037, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.4393626991565136, | |
| "grad_norm": 0.3965462936666553, | |
| "learning_rate": 6.216520433716544e-06, | |
| "loss": 0.3078, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.44311152764761, | |
| "grad_norm": 0.35168327410996947, | |
| "learning_rate": 6.1953567157612546e-06, | |
| "loss": 0.304, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.4468603561387066, | |
| "grad_norm": 0.28589685002558846, | |
| "learning_rate": 6.174170258890183e-06, | |
| "loss": 0.3084, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.4506091846298033, | |
| "grad_norm": 0.3031381651642691, | |
| "learning_rate": 6.152961466127003e-06, | |
| "loss": 0.3082, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.4543580131208997, | |
| "grad_norm": 0.2921304333612235, | |
| "learning_rate": 6.131730740920281e-06, | |
| "loss": 0.3082, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.4581068416119962, | |
| "grad_norm": 0.30283235717875956, | |
| "learning_rate": 6.110478487135798e-06, | |
| "loss": 0.3061, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.461855670103093, | |
| "grad_norm": 0.332497742843437, | |
| "learning_rate": 6.0892051090488635e-06, | |
| "loss": 0.3084, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.4656044985941894, | |
| "grad_norm": 0.41697991725406525, | |
| "learning_rate": 6.067911011336631e-06, | |
| "loss": 0.3071, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.4693533270852859, | |
| "grad_norm": 0.4051234101656245, | |
| "learning_rate": 6.046596599070401e-06, | |
| "loss": 0.3104, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.4731021555763824, | |
| "grad_norm": 0.3164549035040791, | |
| "learning_rate": 6.0252622777079035e-06, | |
| "loss": 0.3099, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.4768509840674788, | |
| "grad_norm": 0.31241166834227563, | |
| "learning_rate": 6.003908453085601e-06, | |
| "loss": 0.311, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.4805998125585753, | |
| "grad_norm": 0.32222188905870913, | |
| "learning_rate": 5.9825355314109526e-06, | |
| "loss": 0.3072, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.484348641049672, | |
| "grad_norm": 0.3227944538376381, | |
| "learning_rate": 5.961143919254703e-06, | |
| "loss": 0.308, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.4880974695407685, | |
| "grad_norm": 0.3300037675651502, | |
| "learning_rate": 5.939734023543136e-06, | |
| "loss": 0.3046, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.491846298031865, | |
| "grad_norm": 0.30075953903254604, | |
| "learning_rate": 5.918306251550339e-06, | |
| "loss": 0.302, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.4955951265229617, | |
| "grad_norm": 0.31317933873421516, | |
| "learning_rate": 5.8968610108904544e-06, | |
| "loss": 0.3067, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.4993439550140581, | |
| "grad_norm": 0.29399450034399577, | |
| "learning_rate": 5.8753987095099265e-06, | |
| "loss": 0.3083, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.5030927835051546, | |
| "grad_norm": 0.3323948341742506, | |
| "learning_rate": 5.85391975567974e-06, | |
| "loss": 0.3062, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.506841611996251, | |
| "grad_norm": 0.3084482430218866, | |
| "learning_rate": 5.832424557987656e-06, | |
| "loss": 0.3099, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.5105904404873476, | |
| "grad_norm": 0.3207010067585515, | |
| "learning_rate": 5.810913525330431e-06, | |
| "loss": 0.3052, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.5143392689784443, | |
| "grad_norm": 0.31177483150319013, | |
| "learning_rate": 5.789387066906058e-06, | |
| "loss": 0.3075, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.5180880974695408, | |
| "grad_norm": 0.29934212917396297, | |
| "learning_rate": 5.7678455922059555e-06, | |
| "loss": 0.3052, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.5218369259606375, | |
| "grad_norm": 0.34568865059481235, | |
| "learning_rate": 5.746289511007203e-06, | |
| "loss": 0.309, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.525585754451734, | |
| "grad_norm": 0.3456913152992093, | |
| "learning_rate": 5.724719233364731e-06, | |
| "loss": 0.311, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.5293345829428304, | |
| "grad_norm": 0.27251969528129666, | |
| "learning_rate": 5.703135169603522e-06, | |
| "loss": 0.3062, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.533083411433927, | |
| "grad_norm": 0.2690943892304097, | |
| "learning_rate": 5.681537730310811e-06, | |
| "loss": 0.3074, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.5368322399250234, | |
| "grad_norm": 0.278333988084598, | |
| "learning_rate": 5.659927326328272e-06, | |
| "loss": 0.3068, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.5405810684161199, | |
| "grad_norm": 0.3244786550521004, | |
| "learning_rate": 5.6383043687442045e-06, | |
| "loss": 0.3056, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.5443298969072163, | |
| "grad_norm": 0.2872353797592551, | |
| "learning_rate": 5.616669268885704e-06, | |
| "loss": 0.3106, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.548078725398313, | |
| "grad_norm": 0.2849148407943691, | |
| "learning_rate": 5.595022438310853e-06, | |
| "loss": 0.3048, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.5518275538894095, | |
| "grad_norm": 0.2796372211580368, | |
| "learning_rate": 5.573364288800879e-06, | |
| "loss": 0.3053, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.5555763823805062, | |
| "grad_norm": 0.30531840338480354, | |
| "learning_rate": 5.551695232352325e-06, | |
| "loss": 0.3087, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.5593252108716027, | |
| "grad_norm": 0.4238218939540311, | |
| "learning_rate": 5.530015681169221e-06, | |
| "loss": 0.3055, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.5630740393626992, | |
| "grad_norm": 0.3815305304277874, | |
| "learning_rate": 5.508326047655228e-06, | |
| "loss": 0.3057, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.5668228678537957, | |
| "grad_norm": 0.3138756321656075, | |
| "learning_rate": 5.486626744405803e-06, | |
| "loss": 0.3041, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.5705716963448921, | |
| "grad_norm": 0.33872416612121964, | |
| "learning_rate": 5.464918184200346e-06, | |
| "loss": 0.3051, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.5743205248359886, | |
| "grad_norm": 0.28251389314936254, | |
| "learning_rate": 5.443200779994352e-06, | |
| "loss": 0.3056, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.5780693533270853, | |
| "grad_norm": 0.30129264913596, | |
| "learning_rate": 5.42147494491155e-06, | |
| "loss": 0.3093, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.5818181818181818, | |
| "grad_norm": 0.3521670880883061, | |
| "learning_rate": 5.399741092236048e-06, | |
| "loss": 0.3048, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.5855670103092785, | |
| "grad_norm": 0.3228669758788861, | |
| "learning_rate": 5.377999635404471e-06, | |
| "loss": 0.3053, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.589315838800375, | |
| "grad_norm": 0.310469703159362, | |
| "learning_rate": 5.356250987998096e-06, | |
| "loss": 0.305, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.5930646672914714, | |
| "grad_norm": 0.30212536889324887, | |
| "learning_rate": 5.334495563734982e-06, | |
| "loss": 0.3053, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.596813495782568, | |
| "grad_norm": 0.286958233099421, | |
| "learning_rate": 5.312733776462104e-06, | |
| "loss": 0.3076, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.6005623242736644, | |
| "grad_norm": 0.28804728629879095, | |
| "learning_rate": 5.290966040147478e-06, | |
| "loss": 0.3043, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.6043111527647609, | |
| "grad_norm": 0.30051995708100954, | |
| "learning_rate": 5.269192768872287e-06, | |
| "loss": 0.3052, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.6080599812558576, | |
| "grad_norm": 0.28056826093377685, | |
| "learning_rate": 5.247414376823002e-06, | |
| "loss": 0.307, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.611808809746954, | |
| "grad_norm": 0.2917800338604193, | |
| "learning_rate": 5.225631278283509e-06, | |
| "loss": 0.3081, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.6155576382380508, | |
| "grad_norm": 0.32427902637956957, | |
| "learning_rate": 5.203843887627223e-06, | |
| "loss": 0.3056, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.6193064667291472, | |
| "grad_norm": 0.3284938009296222, | |
| "learning_rate": 5.1820526193092035e-06, | |
| "loss": 0.3, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.6230552952202437, | |
| "grad_norm": 0.2994176202208045, | |
| "learning_rate": 5.160257887858278e-06, | |
| "loss": 0.3055, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.6268041237113402, | |
| "grad_norm": 0.30335734202532266, | |
| "learning_rate": 5.138460107869144e-06, | |
| "loss": 0.3063, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.6305529522024367, | |
| "grad_norm": 0.2981285125458323, | |
| "learning_rate": 5.116659693994502e-06, | |
| "loss": 0.3075, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.6343017806935332, | |
| "grad_norm": 0.31311406252278756, | |
| "learning_rate": 5.09485706093715e-06, | |
| "loss": 0.305, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.6380506091846299, | |
| "grad_norm": 0.34474359679529554, | |
| "learning_rate": 5.073052623442102e-06, | |
| "loss": 0.3058, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.6417994376757263, | |
| "grad_norm": 0.28599712118797227, | |
| "learning_rate": 5.0512467962886925e-06, | |
| "loss": 0.3061, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.6455482661668228, | |
| "grad_norm": 0.2943654382080562, | |
| "learning_rate": 5.029439994282698e-06, | |
| "loss": 0.3066, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.6492970946579195, | |
| "grad_norm": 0.29496328633119784, | |
| "learning_rate": 5.007632632248435e-06, | |
| "loss": 0.3041, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.653045923149016, | |
| "grad_norm": 0.3047275804837925, | |
| "learning_rate": 4.985825125020875e-06, | |
| "loss": 0.3057, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.6567947516401125, | |
| "grad_norm": 0.3144745132241415, | |
| "learning_rate": 4.9640178874377555e-06, | |
| "loss": 0.3076, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.660543580131209, | |
| "grad_norm": 0.3080039277090962, | |
| "learning_rate": 4.942211334331673e-06, | |
| "loss": 0.3042, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.6642924086223054, | |
| "grad_norm": 0.2933857618191826, | |
| "learning_rate": 4.920405880522216e-06, | |
| "loss": 0.3013, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.668041237113402, | |
| "grad_norm": 0.2654508570280346, | |
| "learning_rate": 4.898601940808054e-06, | |
| "loss": 0.3061, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.6717900656044986, | |
| "grad_norm": 0.2612577806304869, | |
| "learning_rate": 4.876799929959056e-06, | |
| "loss": 0.3051, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.675538894095595, | |
| "grad_norm": 0.2941200079354783, | |
| "learning_rate": 4.855000262708403e-06, | |
| "loss": 0.3058, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.6792877225866918, | |
| "grad_norm": 0.292477790531897, | |
| "learning_rate": 4.833203353744685e-06, | |
| "loss": 0.3003, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.6830365510777883, | |
| "grad_norm": 0.3182850195508283, | |
| "learning_rate": 4.811409617704031e-06, | |
| "loss": 0.3055, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.6867853795688847, | |
| "grad_norm": 0.295963046759259, | |
| "learning_rate": 4.789619469162207e-06, | |
| "loss": 0.3038, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6905342080599812, | |
| "grad_norm": 0.30453062664454494, | |
| "learning_rate": 4.767833322626739e-06, | |
| "loss": 0.3077, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.6942830365510777, | |
| "grad_norm": 0.32102961685654363, | |
| "learning_rate": 4.746051592529024e-06, | |
| "loss": 0.3045, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.6980318650421742, | |
| "grad_norm": 0.3860426253093947, | |
| "learning_rate": 4.72427469321644e-06, | |
| "loss": 0.304, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.7017806935332709, | |
| "grad_norm": 0.31976534291795883, | |
| "learning_rate": 4.702503038944477e-06, | |
| "loss": 0.3065, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.7055295220243674, | |
| "grad_norm": 0.30020103673628445, | |
| "learning_rate": 4.680737043868847e-06, | |
| "loss": 0.3051, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.709278350515464, | |
| "grad_norm": 0.3397891462095839, | |
| "learning_rate": 4.658977122037613e-06, | |
| "loss": 0.304, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.7130271790065605, | |
| "grad_norm": 0.27912887922833707, | |
| "learning_rate": 4.637223687383301e-06, | |
| "loss": 0.3037, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.716776007497657, | |
| "grad_norm": 0.3191792078436299, | |
| "learning_rate": 4.6154771537150395e-06, | |
| "loss": 0.3045, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.7205248359887535, | |
| "grad_norm": 0.33133767848366, | |
| "learning_rate": 4.593737934710682e-06, | |
| "loss": 0.3054, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.72427366447985, | |
| "grad_norm": 0.3304449062895558, | |
| "learning_rate": 4.572006443908931e-06, | |
| "loss": 0.3074, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.7280224929709465, | |
| "grad_norm": 0.31412432430168175, | |
| "learning_rate": 4.550283094701486e-06, | |
| "loss": 0.3014, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.7317713214620432, | |
| "grad_norm": 0.3205845577239349, | |
| "learning_rate": 4.528568300325163e-06, | |
| "loss": 0.3048, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.7355201499531396, | |
| "grad_norm": 0.2978584746193892, | |
| "learning_rate": 4.506862473854051e-06, | |
| "loss": 0.3073, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.7392689784442363, | |
| "grad_norm": 0.31780026167650316, | |
| "learning_rate": 4.485166028191635e-06, | |
| "loss": 0.3042, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.7430178069353328, | |
| "grad_norm": 0.29696017195351765, | |
| "learning_rate": 4.46347937606296e-06, | |
| "loss": 0.3033, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.7467666354264293, | |
| "grad_norm": 0.3034994709800072, | |
| "learning_rate": 4.441802930006769e-06, | |
| "loss": 0.3019, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.7505154639175258, | |
| "grad_norm": 0.27513107950398713, | |
| "learning_rate": 4.420137102367655e-06, | |
| "loss": 0.3013, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.7542642924086223, | |
| "grad_norm": 0.27992210471747286, | |
| "learning_rate": 4.3984823052882275e-06, | |
| "loss": 0.3078, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.7580131208997187, | |
| "grad_norm": 0.26668301924819904, | |
| "learning_rate": 4.376838950701253e-06, | |
| "loss": 0.3048, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.7617619493908152, | |
| "grad_norm": 0.366231766614244, | |
| "learning_rate": 4.355207450321843e-06, | |
| "loss": 0.3062, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.765510777881912, | |
| "grad_norm": 0.3017858158791989, | |
| "learning_rate": 4.333588215639602e-06, | |
| "loss": 0.3075, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.7692596063730084, | |
| "grad_norm": 0.346236001649392, | |
| "learning_rate": 4.3119816579108105e-06, | |
| "loss": 0.3051, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.773008434864105, | |
| "grad_norm": 0.2643095034172052, | |
| "learning_rate": 4.290388188150602e-06, | |
| "loss": 0.3076, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.7767572633552016, | |
| "grad_norm": 0.29431607280457334, | |
| "learning_rate": 4.268808217125135e-06, | |
| "loss": 0.2987, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.780506091846298, | |
| "grad_norm": 0.2876827526865779, | |
| "learning_rate": 4.247242155343791e-06, | |
| "loss": 0.3025, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.7842549203373945, | |
| "grad_norm": 0.3017890755638588, | |
| "learning_rate": 4.225690413051357e-06, | |
| "loss": 0.3047, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.788003748828491, | |
| "grad_norm": 0.2569896947025125, | |
| "learning_rate": 4.204153400220226e-06, | |
| "loss": 0.3018, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.7917525773195875, | |
| "grad_norm": 0.3176997642563989, | |
| "learning_rate": 4.1826315265426e-06, | |
| "loss": 0.3029, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.7955014058106842, | |
| "grad_norm": 0.26253963065958663, | |
| "learning_rate": 4.161125201422685e-06, | |
| "loss": 0.3056, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.7992502343017807, | |
| "grad_norm": 0.2999248709623152, | |
| "learning_rate": 4.139634833968918e-06, | |
| "loss": 0.3034, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.8029990627928774, | |
| "grad_norm": 0.2625190575233833, | |
| "learning_rate": 4.118160832986178e-06, | |
| "loss": 0.3066, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.8067478912839738, | |
| "grad_norm": 0.28396845470603027, | |
| "learning_rate": 4.096703606968007e-06, | |
| "loss": 0.2975, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.8104967197750703, | |
| "grad_norm": 0.28114557639064414, | |
| "learning_rate": 4.075263564088841e-06, | |
| "loss": 0.3057, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.8142455482661668, | |
| "grad_norm": 0.28640490820378106, | |
| "learning_rate": 4.05384111219625e-06, | |
| "loss": 0.304, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.8179943767572633, | |
| "grad_norm": 0.3121384034406098, | |
| "learning_rate": 4.032436658803175e-06, | |
| "loss": 0.3018, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.8217432052483598, | |
| "grad_norm": 0.3535199694389629, | |
| "learning_rate": 4.011050611080173e-06, | |
| "loss": 0.3016, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.8254920337394565, | |
| "grad_norm": 0.2763793407172489, | |
| "learning_rate": 3.989683375847681e-06, | |
| "loss": 0.305, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.829240862230553, | |
| "grad_norm": 0.2650103173219085, | |
| "learning_rate": 3.968335359568267e-06, | |
| "loss": 0.3053, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.8329896907216496, | |
| "grad_norm": 0.2963214885097785, | |
| "learning_rate": 3.947006968338904e-06, | |
| "loss": 0.3026, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.8367385192127461, | |
| "grad_norm": 0.2628968628669574, | |
| "learning_rate": 3.9256986078832445e-06, | |
| "loss": 0.3013, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.8404873477038426, | |
| "grad_norm": 0.2681562658141619, | |
| "learning_rate": 3.9044106835439e-06, | |
| "loss": 0.3043, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.844236176194939, | |
| "grad_norm": 0.2778911339445839, | |
| "learning_rate": 3.883143600274737e-06, | |
| "loss": 0.3065, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.8479850046860355, | |
| "grad_norm": 0.31108432801110153, | |
| "learning_rate": 3.861897762633158e-06, | |
| "loss": 0.3047, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.851733833177132, | |
| "grad_norm": 0.3091817436636937, | |
| "learning_rate": 3.840673574772427e-06, | |
| "loss": 0.3016, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.8554826616682287, | |
| "grad_norm": 0.2779952989085048, | |
| "learning_rate": 3.819471440433963e-06, | |
| "loss": 0.3059, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.8592314901593252, | |
| "grad_norm": 0.30082155927958976, | |
| "learning_rate": 3.798291762939672e-06, | |
| "loss": 0.3037, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.862980318650422, | |
| "grad_norm": 0.2714535755041495, | |
| "learning_rate": 3.7771349451842706e-06, | |
| "loss": 0.3024, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.8667291471415184, | |
| "grad_norm": 0.2646413815703557, | |
| "learning_rate": 3.7560013896276154e-06, | |
| "loss": 0.3062, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.8704779756326149, | |
| "grad_norm": 0.27989036154054725, | |
| "learning_rate": 3.7348914982870598e-06, | |
| "loss": 0.3014, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.8742268041237113, | |
| "grad_norm": 0.249918156959456, | |
| "learning_rate": 3.7138056727297966e-06, | |
| "loss": 0.3066, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.8779756326148078, | |
| "grad_norm": 0.2756181408313783, | |
| "learning_rate": 3.6927443140652243e-06, | |
| "loss": 0.2989, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.8817244611059043, | |
| "grad_norm": 0.28120277663989135, | |
| "learning_rate": 3.6717078229373094e-06, | |
| "loss": 0.3058, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.8854732895970008, | |
| "grad_norm": 0.30112216057324276, | |
| "learning_rate": 3.6506965995169778e-06, | |
| "loss": 0.302, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.8892221180880975, | |
| "grad_norm": 0.25724383909329807, | |
| "learning_rate": 3.6297110434944937e-06, | |
| "loss": 0.3009, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.892970946579194, | |
| "grad_norm": 0.25981738649741903, | |
| "learning_rate": 3.6087515540718533e-06, | |
| "loss": 0.3055, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8967197750702907, | |
| "grad_norm": 0.25652902199859806, | |
| "learning_rate": 3.587818529955203e-06, | |
| "loss": 0.3025, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.9004686035613871, | |
| "grad_norm": 0.2673452955037198, | |
| "learning_rate": 3.5669123693472386e-06, | |
| "loss": 0.3048, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.9042174320524836, | |
| "grad_norm": 0.26507376497776003, | |
| "learning_rate": 3.5460334699396486e-06, | |
| "loss": 0.299, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.90796626054358, | |
| "grad_norm": 0.28370264506275916, | |
| "learning_rate": 3.525182228905532e-06, | |
| "loss": 0.3026, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.9117150890346766, | |
| "grad_norm": 0.2712966167311274, | |
| "learning_rate": 3.5043590428918543e-06, | |
| "loss": 0.3016, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.915463917525773, | |
| "grad_norm": 0.2553393508783198, | |
| "learning_rate": 3.4835643080119035e-06, | |
| "loss": 0.3007, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.9192127460168698, | |
| "grad_norm": 0.27034735418826117, | |
| "learning_rate": 3.4627984198377397e-06, | |
| "loss": 0.3007, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.9229615745079662, | |
| "grad_norm": 0.30646341895729456, | |
| "learning_rate": 3.4420617733926897e-06, | |
| "loss": 0.3, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.926710402999063, | |
| "grad_norm": 0.2512695351382934, | |
| "learning_rate": 3.421354763143817e-06, | |
| "loss": 0.306, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.9304592314901594, | |
| "grad_norm": 0.295107786520028, | |
| "learning_rate": 3.40067778299443e-06, | |
| "loss": 0.3018, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.934208059981256, | |
| "grad_norm": 0.26954378841421245, | |
| "learning_rate": 3.380031226276579e-06, | |
| "loss": 0.2959, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.9379568884723524, | |
| "grad_norm": 0.2817857686250554, | |
| "learning_rate": 3.3594154857435824e-06, | |
| "loss": 0.2995, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.9417057169634488, | |
| "grad_norm": 0.26193810687692165, | |
| "learning_rate": 3.33883095356255e-06, | |
| "loss": 0.3015, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.9454545454545453, | |
| "grad_norm": 0.24259516878097934, | |
| "learning_rate": 3.318278021306921e-06, | |
| "loss": 0.3054, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.949203373945642, | |
| "grad_norm": 0.25065102402789546, | |
| "learning_rate": 3.297757079949024e-06, | |
| "loss": 0.3009, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.9529522024367385, | |
| "grad_norm": 0.2983273686405371, | |
| "learning_rate": 3.27726851985263e-06, | |
| "loss": 0.3014, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.9567010309278352, | |
| "grad_norm": 0.2822433040270773, | |
| "learning_rate": 3.2568127307655332e-06, | |
| "loss": 0.3047, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.9604498594189317, | |
| "grad_norm": 0.2921957259400507, | |
| "learning_rate": 3.236390101812137e-06, | |
| "loss": 0.3045, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.9641986879100282, | |
| "grad_norm": 0.3033695868527522, | |
| "learning_rate": 3.2160010214860415e-06, | |
| "loss": 0.3, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.9679475164011246, | |
| "grad_norm": 0.3020931816317373, | |
| "learning_rate": 3.1956458776426704e-06, | |
| "loss": 0.2982, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.9716963448922211, | |
| "grad_norm": 0.3154539666343415, | |
| "learning_rate": 3.1753250574918755e-06, | |
| "loss": 0.3045, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.9754451733833176, | |
| "grad_norm": 0.3079829291596006, | |
| "learning_rate": 3.1550389475905884e-06, | |
| "loss": 0.3033, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.9791940018744143, | |
| "grad_norm": 0.2795592575657611, | |
| "learning_rate": 3.1347879338354474e-06, | |
| "loss": 0.3013, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.9829428303655108, | |
| "grad_norm": 0.28905850248906373, | |
| "learning_rate": 3.114572401455476e-06, | |
| "loss": 0.3018, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.9866916588566073, | |
| "grad_norm": 0.2917801204906715, | |
| "learning_rate": 3.094392735004742e-06, | |
| "loss": 0.3077, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.990440487347704, | |
| "grad_norm": 0.2869880836395518, | |
| "learning_rate": 3.074249318355046e-06, | |
| "loss": 0.305, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.9941893158388004, | |
| "grad_norm": 0.266193620073341, | |
| "learning_rate": 3.0541425346886234e-06, | |
| "loss": 0.3042, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.997938144329897, | |
| "grad_norm": 0.24366671055144762, | |
| "learning_rate": 3.0340727664908437e-06, | |
| "loss": 0.3031, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.0014995313964388, | |
| "grad_norm": 0.27572825512606464, | |
| "learning_rate": 3.0140403955429498e-06, | |
| "loss": 0.2986, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.0052483598875352, | |
| "grad_norm": 0.27324353099912724, | |
| "learning_rate": 2.9940458029147833e-06, | |
| "loss": 0.2982, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.0089971883786317, | |
| "grad_norm": 0.2737011310465298, | |
| "learning_rate": 2.974089368957542e-06, | |
| "loss": 0.2987, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.012746016869728, | |
| "grad_norm": 0.24331428459490873, | |
| "learning_rate": 2.954171473296543e-06, | |
| "loss": 0.299, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.0164948453608247, | |
| "grad_norm": 0.26924383948866965, | |
| "learning_rate": 2.934292494823997e-06, | |
| "loss": 0.2999, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.020243673851921, | |
| "grad_norm": 0.26880279888098063, | |
| "learning_rate": 2.9144528116918114e-06, | |
| "loss": 0.2982, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.0239925023430176, | |
| "grad_norm": 0.26881181242331237, | |
| "learning_rate": 2.894652801304382e-06, | |
| "loss": 0.2985, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.027741330834114, | |
| "grad_norm": 0.2759806088124146, | |
| "learning_rate": 2.8748928403114274e-06, | |
| "loss": 0.2983, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.031490159325211, | |
| "grad_norm": 0.25861803616143986, | |
| "learning_rate": 2.855173304600817e-06, | |
| "loss": 0.299, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.0352389878163075, | |
| "grad_norm": 0.2522194380092259, | |
| "learning_rate": 2.835494569291423e-06, | |
| "loss": 0.3015, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.038987816307404, | |
| "grad_norm": 0.28629406566171184, | |
| "learning_rate": 2.8158570087259825e-06, | |
| "loss": 0.2968, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.0427366447985005, | |
| "grad_norm": 0.25257171184095567, | |
| "learning_rate": 2.796260996463975e-06, | |
| "loss": 0.2972, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.046485473289597, | |
| "grad_norm": 0.292661843719565, | |
| "learning_rate": 2.7767069052745267e-06, | |
| "loss": 0.2982, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.0502343017806934, | |
| "grad_norm": 0.29909777284030853, | |
| "learning_rate": 2.7571951071293015e-06, | |
| "loss": 0.2961, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.05398313027179, | |
| "grad_norm": 0.27936272946203394, | |
| "learning_rate": 2.737725973195442e-06, | |
| "loss": 0.3, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.0577319587628864, | |
| "grad_norm": 0.2722976980702845, | |
| "learning_rate": 2.718299873828505e-06, | |
| "loss": 0.2995, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.0614807872539833, | |
| "grad_norm": 0.2556623687563956, | |
| "learning_rate": 2.698917178565403e-06, | |
| "loss": 0.3004, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.06522961574508, | |
| "grad_norm": 0.24143453577190127, | |
| "learning_rate": 2.6795782561173946e-06, | |
| "loss": 0.3012, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.0689784442361763, | |
| "grad_norm": 0.2554088706670706, | |
| "learning_rate": 2.6602834743630567e-06, | |
| "loss": 0.3007, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.0727272727272728, | |
| "grad_norm": 0.24613264972717522, | |
| "learning_rate": 2.6410332003412953e-06, | |
| "loss": 0.2997, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.0764761012183692, | |
| "grad_norm": 0.2851526682441657, | |
| "learning_rate": 2.6218278002443513e-06, | |
| "loss": 0.2985, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.0802249297094657, | |
| "grad_norm": 0.2672191793824331, | |
| "learning_rate": 2.602667639410849e-06, | |
| "loss": 0.2998, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.083973758200562, | |
| "grad_norm": 0.26199548768893466, | |
| "learning_rate": 2.5835530823188393e-06, | |
| "loss": 0.2977, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.0877225866916587, | |
| "grad_norm": 0.26975447619905446, | |
| "learning_rate": 2.5644844925788605e-06, | |
| "loss": 0.2979, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.0914714151827556, | |
| "grad_norm": 0.25886114170392355, | |
| "learning_rate": 2.5454622329270354e-06, | |
| "loss": 0.301, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.095220243673852, | |
| "grad_norm": 0.2739516733416926, | |
| "learning_rate": 2.5264866652181572e-06, | |
| "loss": 0.2986, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.0989690721649485, | |
| "grad_norm": 0.26592574679314473, | |
| "learning_rate": 2.5075581504188162e-06, | |
| "loss": 0.297, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.102717900656045, | |
| "grad_norm": 0.2604176424953529, | |
| "learning_rate": 2.4886770486005283e-06, | |
| "loss": 0.3002, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.1064667291471415, | |
| "grad_norm": 0.2750968013788369, | |
| "learning_rate": 2.469843718932883e-06, | |
| "loss": 0.2955, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.110215557638238, | |
| "grad_norm": 0.2526155437469898, | |
| "learning_rate": 2.45105851967672e-06, | |
| "loss": 0.3008, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.1139643861293345, | |
| "grad_norm": 0.2410587895663488, | |
| "learning_rate": 2.432321808177304e-06, | |
| "loss": 0.3005, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.117713214620431, | |
| "grad_norm": 0.2895237677114124, | |
| "learning_rate": 2.413633940857535e-06, | |
| "loss": 0.2981, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.121462043111528, | |
| "grad_norm": 0.24128988947398597, | |
| "learning_rate": 2.394995273211159e-06, | |
| "loss": 0.2935, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.1252108716026243, | |
| "grad_norm": 0.2507583674367069, | |
| "learning_rate": 2.376406159796018e-06, | |
| "loss": 0.2964, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.128959700093721, | |
| "grad_norm": 0.2412689179502715, | |
| "learning_rate": 2.357866954227297e-06, | |
| "loss": 0.2981, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.1327085285848173, | |
| "grad_norm": 0.2522987442122012, | |
| "learning_rate": 2.3393780091707925e-06, | |
| "loss": 0.3003, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.1364573570759138, | |
| "grad_norm": 0.2933683784691293, | |
| "learning_rate": 2.32093967633622e-06, | |
| "loss": 0.2976, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.1402061855670103, | |
| "grad_norm": 0.24892001274036304, | |
| "learning_rate": 2.3025523064705054e-06, | |
| "loss": 0.2938, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.1439550140581067, | |
| "grad_norm": 0.2486503075797692, | |
| "learning_rate": 2.284216249351125e-06, | |
| "loss": 0.3002, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.147703842549203, | |
| "grad_norm": 0.2408285712986302, | |
| "learning_rate": 2.265931853779449e-06, | |
| "loss": 0.298, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.1514526710402997, | |
| "grad_norm": 0.2539542237070198, | |
| "learning_rate": 2.2476994675741032e-06, | |
| "loss": 0.2995, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.1552014995313966, | |
| "grad_norm": 0.25802690903176057, | |
| "learning_rate": 2.2295194375643574e-06, | |
| "loss": 0.2986, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.158950328022493, | |
| "grad_norm": 0.2434560170701692, | |
| "learning_rate": 2.21139210958352e-06, | |
| "loss": 0.2974, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.1626991565135896, | |
| "grad_norm": 0.25695023854646953, | |
| "learning_rate": 2.1933178284623696e-06, | |
| "loss": 0.2967, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.166447985004686, | |
| "grad_norm": 0.23288003046372271, | |
| "learning_rate": 2.175296938022586e-06, | |
| "loss": 0.3007, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.1701968134957825, | |
| "grad_norm": 0.23121575998389227, | |
| "learning_rate": 2.1573297810702178e-06, | |
| "loss": 0.2961, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.173945641986879, | |
| "grad_norm": 0.24000439116988645, | |
| "learning_rate": 2.139416699389153e-06, | |
| "loss": 0.2958, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.1776944704779755, | |
| "grad_norm": 0.2871636238090685, | |
| "learning_rate": 2.121558033734626e-06, | |
| "loss": 0.2992, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.181443298969072, | |
| "grad_norm": 0.26976350838721214, | |
| "learning_rate": 2.103754123826729e-06, | |
| "loss": 0.2952, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.185192127460169, | |
| "grad_norm": 0.2521119119225825, | |
| "learning_rate": 2.0860053083439523e-06, | |
| "loss": 0.2992, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.1889409559512654, | |
| "grad_norm": 0.23812422246398923, | |
| "learning_rate": 2.0683119249167444e-06, | |
| "loss": 0.2959, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.192689784442362, | |
| "grad_norm": 0.24908411598662353, | |
| "learning_rate": 2.0506743101210786e-06, | |
| "loss": 0.2992, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.1964386129334583, | |
| "grad_norm": 0.24276676952619408, | |
| "learning_rate": 2.033092799472065e-06, | |
| "loss": 0.2951, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.200187441424555, | |
| "grad_norm": 0.26493805047445046, | |
| "learning_rate": 2.0155677274175607e-06, | |
| "loss": 0.2977, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.2039362699156513, | |
| "grad_norm": 0.2557649875393899, | |
| "learning_rate": 1.9980994273318033e-06, | |
| "loss": 0.301, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.2076850984067478, | |
| "grad_norm": 0.25974462261893655, | |
| "learning_rate": 1.9806882315090796e-06, | |
| "loss": 0.2986, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.2114339268978442, | |
| "grad_norm": 0.2788427522001163, | |
| "learning_rate": 1.963334471157395e-06, | |
| "loss": 0.3006, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.215182755388941, | |
| "grad_norm": 0.2639877192794817, | |
| "learning_rate": 1.946038476392179e-06, | |
| "loss": 0.2939, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.2189315838800376, | |
| "grad_norm": 0.2552096590711124, | |
| "learning_rate": 1.9288005762300034e-06, | |
| "loss": 0.2977, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.222680412371134, | |
| "grad_norm": 0.2618656152750493, | |
| "learning_rate": 1.9116210985823234e-06, | |
| "loss": 0.2977, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.2264292408622306, | |
| "grad_norm": 0.23852460637945597, | |
| "learning_rate": 1.894500370249242e-06, | |
| "loss": 0.2989, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.230178069353327, | |
| "grad_norm": 0.24929690435833737, | |
| "learning_rate": 1.8774387169132858e-06, | |
| "loss": 0.2999, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.2339268978444236, | |
| "grad_norm": 0.23828564971308522, | |
| "learning_rate": 1.8604364631332216e-06, | |
| "loss": 0.297, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.23767572633552, | |
| "grad_norm": 0.24494166996235048, | |
| "learning_rate": 1.8434939323378715e-06, | |
| "loss": 0.2973, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.2414245548266165, | |
| "grad_norm": 0.24086882859135658, | |
| "learning_rate": 1.8266114468199692e-06, | |
| "loss": 0.295, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.245173383317713, | |
| "grad_norm": 0.2544357426841843, | |
| "learning_rate": 1.80978932773002e-06, | |
| "loss": 0.2973, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.24892221180881, | |
| "grad_norm": 0.23643151878409177, | |
| "learning_rate": 1.7930278950701997e-06, | |
| "loss": 0.3002, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.2526710402999064, | |
| "grad_norm": 0.23975129409481882, | |
| "learning_rate": 1.7763274676882647e-06, | |
| "loss": 0.2979, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.256419868791003, | |
| "grad_norm": 0.24291773824786236, | |
| "learning_rate": 1.7596883632714852e-06, | |
| "loss": 0.2932, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.2601686972820993, | |
| "grad_norm": 0.25153396655313853, | |
| "learning_rate": 1.7431108983406036e-06, | |
| "loss": 0.298, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.263917525773196, | |
| "grad_norm": 0.23197295943529667, | |
| "learning_rate": 1.7265953882438086e-06, | |
| "loss": 0.2988, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.2676663542642923, | |
| "grad_norm": 0.24970808760082508, | |
| "learning_rate": 1.7101421471507457e-06, | |
| "loss": 0.2939, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.271415182755389, | |
| "grad_norm": 0.228549562748791, | |
| "learning_rate": 1.6937514880465355e-06, | |
| "loss": 0.2989, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.2751640112464857, | |
| "grad_norm": 0.24315653299360235, | |
| "learning_rate": 1.6774237227258144e-06, | |
| "loss": 0.2973, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.278912839737582, | |
| "grad_norm": 0.24556558850550259, | |
| "learning_rate": 1.6611591617868162e-06, | |
| "loss": 0.2959, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.2826616682286787, | |
| "grad_norm": 0.2148170791770767, | |
| "learning_rate": 1.6449581146254496e-06, | |
| "loss": 0.2991, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.286410496719775, | |
| "grad_norm": 0.24023542138163662, | |
| "learning_rate": 1.628820889429426e-06, | |
| "loss": 0.2949, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.2901593252108716, | |
| "grad_norm": 0.23817289823295282, | |
| "learning_rate": 1.6127477931723857e-06, | |
| "loss": 0.2959, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.293908153701968, | |
| "grad_norm": 0.2431772216034719, | |
| "learning_rate": 1.596739131608065e-06, | |
| "loss": 0.2973, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.2976569821930646, | |
| "grad_norm": 0.2382144152110202, | |
| "learning_rate": 1.5807952092644795e-06, | |
| "loss": 0.2994, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.301405810684161, | |
| "grad_norm": 0.24054816884904162, | |
| "learning_rate": 1.564916329438128e-06, | |
| "loss": 0.2936, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.3051546391752575, | |
| "grad_norm": 0.26274953712674814, | |
| "learning_rate": 1.549102794188228e-06, | |
| "loss": 0.2973, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.3089034676663545, | |
| "grad_norm": 0.2806875425573502, | |
| "learning_rate": 1.5333549043309592e-06, | |
| "loss": 0.2977, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.312652296157451, | |
| "grad_norm": 0.24329932907358484, | |
| "learning_rate": 1.5176729594337575e-06, | |
| "loss": 0.2958, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.3164011246485474, | |
| "grad_norm": 0.24003607606344493, | |
| "learning_rate": 1.5020572578095999e-06, | |
| "loss": 0.2986, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.320149953139644, | |
| "grad_norm": 0.26644077871725813, | |
| "learning_rate": 1.4865080965113415e-06, | |
| "loss": 0.2998, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.3238987816307404, | |
| "grad_norm": 0.24781415510678054, | |
| "learning_rate": 1.4710257713260623e-06, | |
| "loss": 0.3022, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.327647610121837, | |
| "grad_norm": 0.24420941065044707, | |
| "learning_rate": 1.4556105767694317e-06, | |
| "loss": 0.2981, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.3313964386129333, | |
| "grad_norm": 0.26073417382172037, | |
| "learning_rate": 1.44026280608012e-06, | |
| "loss": 0.2984, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.33514526710403, | |
| "grad_norm": 0.25521419311104987, | |
| "learning_rate": 1.42498275121421e-06, | |
| "loss": 0.2954, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.3388940955951263, | |
| "grad_norm": 0.23848221017188856, | |
| "learning_rate": 1.4097707028396496e-06, | |
| "loss": 0.3005, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.342642924086223, | |
| "grad_norm": 0.26233800895757997, | |
| "learning_rate": 1.394626950330713e-06, | |
| "loss": 0.2989, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.3463917525773197, | |
| "grad_norm": 0.2548396023100127, | |
| "learning_rate": 1.3795517817625088e-06, | |
| "loss": 0.2978, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.350140581068416, | |
| "grad_norm": 0.22752855679283976, | |
| "learning_rate": 1.3645454839054921e-06, | |
| "loss": 0.2967, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.3538894095595126, | |
| "grad_norm": 0.2664643076296192, | |
| "learning_rate": 1.3496083422200085e-06, | |
| "loss": 0.2978, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.357638238050609, | |
| "grad_norm": 0.2543566847419294, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 0.2953, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.3613870665417056, | |
| "grad_norm": 0.263835168702037, | |
| "learning_rate": 1.3199426626219407e-06, | |
| "loss": 0.2989, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.365135895032802, | |
| "grad_norm": 0.25548238955558084, | |
| "learning_rate": 1.3052146890307683e-06, | |
| "loss": 0.2981, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.368884723523899, | |
| "grad_norm": 0.23791649446797647, | |
| "learning_rate": 1.2905570002432188e-06, | |
| "loss": 0.2963, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.3726335520149955, | |
| "grad_norm": 0.25494794302918333, | |
| "learning_rate": 1.2759698750881533e-06, | |
| "loss": 0.2973, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.376382380506092, | |
| "grad_norm": 0.23772018287469734, | |
| "learning_rate": 1.261453591052123e-06, | |
| "loss": 0.2975, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.3801312089971884, | |
| "grad_norm": 0.22552210252604188, | |
| "learning_rate": 1.2470084242740848e-06, | |
| "loss": 0.2971, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.383880037488285, | |
| "grad_norm": 0.2497054243868162, | |
| "learning_rate": 1.2326346495401587e-06, | |
| "loss": 0.2989, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.3876288659793814, | |
| "grad_norm": 0.23428555088684583, | |
| "learning_rate": 1.2183325402783892e-06, | |
| "loss": 0.2974, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.391377694470478, | |
| "grad_norm": 0.24172427166914104, | |
| "learning_rate": 1.2041023685535557e-06, | |
| "loss": 0.2959, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.3951265229615744, | |
| "grad_norm": 0.24675551010483163, | |
| "learning_rate": 1.1899444050619891e-06, | |
| "loss": 0.2967, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.398875351452671, | |
| "grad_norm": 0.2375966890453732, | |
| "learning_rate": 1.1758589191264214e-06, | |
| "loss": 0.2994, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.4026241799437678, | |
| "grad_norm": 0.23458923294261602, | |
| "learning_rate": 1.1618461786908698e-06, | |
| "loss": 0.2989, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.4063730084348642, | |
| "grad_norm": 0.22080305330098507, | |
| "learning_rate": 1.1479064503155335e-06, | |
| "loss": 0.2957, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.4101218369259607, | |
| "grad_norm": 0.24659042545889448, | |
| "learning_rate": 1.1340399991717266e-06, | |
| "loss": 0.2928, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.413870665417057, | |
| "grad_norm": 0.2393208054211518, | |
| "learning_rate": 1.1202470890368283e-06, | |
| "loss": 0.2952, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.4176194939081537, | |
| "grad_norm": 0.2452372681591504, | |
| "learning_rate": 1.1065279822892732e-06, | |
| "loss": 0.2989, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.42136832239925, | |
| "grad_norm": 0.24729288324568552, | |
| "learning_rate": 1.0928829399035563e-06, | |
| "loss": 0.2954, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.4251171508903466, | |
| "grad_norm": 0.24671451339099365, | |
| "learning_rate": 1.0793122214452617e-06, | |
| "loss": 0.2957, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.428865979381443, | |
| "grad_norm": 0.24427659099939275, | |
| "learning_rate": 1.0658160850661408e-06, | |
| "loss": 0.3004, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.4326148078725396, | |
| "grad_norm": 0.21374771872871845, | |
| "learning_rate": 1.0523947874991842e-06, | |
| "loss": 0.2984, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.4363636363636365, | |
| "grad_norm": 0.22329887893290565, | |
| "learning_rate": 1.0390485840537506e-06, | |
| "loss": 0.297, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.440112464854733, | |
| "grad_norm": 0.22533705247175104, | |
| "learning_rate": 1.0257777286107045e-06, | |
| "loss": 0.2943, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.4438612933458295, | |
| "grad_norm": 0.2423245266897566, | |
| "learning_rate": 1.0125824736175877e-06, | |
| "loss": 0.3007, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.447610121836926, | |
| "grad_norm": 0.22780962278138578, | |
| "learning_rate": 9.994630700838175e-07, | |
| "loss": 0.2967, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.4513589503280224, | |
| "grad_norm": 0.22430754799850255, | |
| "learning_rate": 9.864197675759096e-07, | |
| "loss": 0.2966, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.455107778819119, | |
| "grad_norm": 0.22784149833015035, | |
| "learning_rate": 9.734528142127353e-07, | |
| "loss": 0.2983, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.4588566073102154, | |
| "grad_norm": 0.22292639589506671, | |
| "learning_rate": 9.605624566607951e-07, | |
| "loss": 0.2951, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.4626054358013123, | |
| "grad_norm": 0.2236123311897104, | |
| "learning_rate": 9.477489401295331e-07, | |
| "loss": 0.2965, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.466354264292409, | |
| "grad_norm": 0.22000628796913552, | |
| "learning_rate": 9.350125083666711e-07, | |
| "loss": 0.2976, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.4701030927835053, | |
| "grad_norm": 0.24493160622717997, | |
| "learning_rate": 9.223534036535636e-07, | |
| "loss": 0.297, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.4738519212746017, | |
| "grad_norm": 0.2455063807901648, | |
| "learning_rate": 9.09771866800604e-07, | |
| "loss": 0.2995, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.477600749765698, | |
| "grad_norm": 0.24548065358066581, | |
| "learning_rate": 8.972681371426273e-07, | |
| "loss": 0.2953, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.4813495782567947, | |
| "grad_norm": 0.2521783316460835, | |
| "learning_rate": 8.848424525343713e-07, | |
| "loss": 0.2946, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.485098406747891, | |
| "grad_norm": 0.2553417419798155, | |
| "learning_rate": 8.724950493459439e-07, | |
| "loss": 0.2963, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.4888472352389877, | |
| "grad_norm": 0.24486438815130893, | |
| "learning_rate": 8.60226162458328e-07, | |
| "loss": 0.2964, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.492596063730084, | |
| "grad_norm": 0.22843671738308705, | |
| "learning_rate": 8.48036025258917e-07, | |
| "loss": 0.2972, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.496344892221181, | |
| "grad_norm": 0.22729569350727913, | |
| "learning_rate": 8.359248696370676e-07, | |
| "loss": 0.298, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.5000937207122775, | |
| "grad_norm": 0.21994248905891314, | |
| "learning_rate": 8.238929259796991e-07, | |
| "loss": 0.2988, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.503842549203374, | |
| "grad_norm": 0.21862264017292513, | |
| "learning_rate": 8.119404231668987e-07, | |
| "loss": 0.2961, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.5075913776944705, | |
| "grad_norm": 0.22932451090242004, | |
| "learning_rate": 8.000675885675812e-07, | |
| "loss": 0.2957, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.511340206185567, | |
| "grad_norm": 0.2346294431862581, | |
| "learning_rate": 7.882746480351499e-07, | |
| "loss": 0.2989, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.5150890346766634, | |
| "grad_norm": 0.2507513804674915, | |
| "learning_rate": 7.765618259032115e-07, | |
| "loss": 0.2967, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.51883786316776, | |
| "grad_norm": 0.2301520077398978, | |
| "learning_rate": 7.649293449813022e-07, | |
| "loss": 0.2989, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.522586691658857, | |
| "grad_norm": 0.22017138623299468, | |
| "learning_rate": 7.533774265506528e-07, | |
| "loss": 0.3008, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.526335520149953, | |
| "grad_norm": 0.23466327607015292, | |
| "learning_rate": 7.419062903599766e-07, | |
| "loss": 0.2973, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.53008434864105, | |
| "grad_norm": 0.23339557992179352, | |
| "learning_rate": 7.305161546212891e-07, | |
| "loss": 0.2998, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.5338331771321463, | |
| "grad_norm": 0.228073145422222, | |
| "learning_rate": 7.192072360057601e-07, | |
| "loss": 0.2976, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.5375820056232428, | |
| "grad_norm": 0.22403329120047444, | |
| "learning_rate": 7.079797496395913e-07, | |
| "loss": 0.2991, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.5413308341143392, | |
| "grad_norm": 0.21653330730415413, | |
| "learning_rate": 6.968339090999188e-07, | |
| "loss": 0.301, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.5450796626054357, | |
| "grad_norm": 0.22798842242544992, | |
| "learning_rate": 6.857699264107592e-07, | |
| "loss": 0.2961, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.548828491096532, | |
| "grad_norm": 0.22152088362625078, | |
| "learning_rate": 6.747880120389671e-07, | |
| "loss": 0.2969, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.5525773195876287, | |
| "grad_norm": 0.24941395004953096, | |
| "learning_rate": 6.638883748902386e-07, | |
| "loss": 0.2942, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.5563261480787256, | |
| "grad_norm": 0.2352854991109166, | |
| "learning_rate": 6.530712223051345e-07, | |
| "loss": 0.2993, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.560074976569822, | |
| "grad_norm": 0.21713333345854474, | |
| "learning_rate": 6.423367600551356e-07, | |
| "loss": 0.2965, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.5638238050609186, | |
| "grad_norm": 0.21808108904670195, | |
| "learning_rate": 6.316851923387302e-07, | |
| "loss": 0.295, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.567572633552015, | |
| "grad_norm": 0.22265553162824864, | |
| "learning_rate": 6.211167217775255e-07, | |
| "loss": 0.2999, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.5713214620431115, | |
| "grad_norm": 0.2122649455456037, | |
| "learning_rate": 6.106315494123999e-07, | |
| "loss": 0.2972, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.575070290534208, | |
| "grad_norm": 0.24269035288607615, | |
| "learning_rate": 6.00229874699671e-07, | |
| "loss": 0.2977, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.5788191190253045, | |
| "grad_norm": 0.2197453016555571, | |
| "learning_rate": 5.899118955073108e-07, | |
| "loss": 0.2946, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.5825679475164014, | |
| "grad_norm": 0.2161395378073621, | |
| "learning_rate": 5.796778081111693e-07, | |
| "loss": 0.2977, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.5863167760074974, | |
| "grad_norm": 0.21287035374436375, | |
| "learning_rate": 5.695278071912541e-07, | |
| "loss": 0.296, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.5900656044985944, | |
| "grad_norm": 0.2318471089563035, | |
| "learning_rate": 5.59462085828017e-07, | |
| "loss": 0.297, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.593814432989691, | |
| "grad_norm": 0.22185763220188276, | |
| "learning_rate": 5.494808354986869e-07, | |
| "loss": 0.2966, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.5975632614807873, | |
| "grad_norm": 0.2258427132260489, | |
| "learning_rate": 5.395842460736251e-07, | |
| "loss": 0.2977, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.601312089971884, | |
| "grad_norm": 0.2214557018015068, | |
| "learning_rate": 5.297725058127101e-07, | |
| "loss": 0.2984, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.6050609184629803, | |
| "grad_norm": 0.22757821840990952, | |
| "learning_rate": 5.200458013617648e-07, | |
| "loss": 0.2967, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.6088097469540767, | |
| "grad_norm": 0.21756167779737182, | |
| "learning_rate": 5.104043177490003e-07, | |
| "loss": 0.2962, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.6125585754451732, | |
| "grad_norm": 0.22089636199337112, | |
| "learning_rate": 5.008482383814934e-07, | |
| "loss": 0.2954, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.61630740393627, | |
| "grad_norm": 0.23596951276418235, | |
| "learning_rate": 4.913777450417051e-07, | |
| "loss": 0.2958, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.620056232427366, | |
| "grad_norm": 0.23285613087396778, | |
| "learning_rate": 4.819930178840171e-07, | |
| "loss": 0.3001, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.623805060918463, | |
| "grad_norm": 0.22413547230254402, | |
| "learning_rate": 4.726942354313063e-07, | |
| "loss": 0.2977, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.6275538894095596, | |
| "grad_norm": 0.21404799569429234, | |
| "learning_rate": 4.634815745715504e-07, | |
| "loss": 0.2977, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.631302717900656, | |
| "grad_norm": 0.2220467447552054, | |
| "learning_rate": 4.5435521055446077e-07, | |
| "loss": 0.3002, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.6350515463917525, | |
| "grad_norm": 0.22203820025276308, | |
| "learning_rate": 4.4531531698815025e-07, | |
| "loss": 0.2985, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.638800374882849, | |
| "grad_norm": 0.21412842926369127, | |
| "learning_rate": 4.3636206583582755e-07, | |
| "loss": 0.2941, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.6425492033739455, | |
| "grad_norm": 0.21845241517697617, | |
| "learning_rate": 4.2749562741253194e-07, | |
| "loss": 0.2983, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.646298031865042, | |
| "grad_norm": 0.23326009884971552, | |
| "learning_rate": 4.1871617038188704e-07, | |
| "loss": 0.2967, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.650046860356139, | |
| "grad_norm": 0.22789428750694415, | |
| "learning_rate": 4.100238617528973e-07, | |
| "loss": 0.2965, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.6537956888472354, | |
| "grad_norm": 0.2310757479907784, | |
| "learning_rate": 4.014188668767671e-07, | |
| "loss": 0.2956, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.657544517338332, | |
| "grad_norm": 0.21314866127121557, | |
| "learning_rate": 3.9290134944375834e-07, | |
| "loss": 0.2975, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.6612933458294283, | |
| "grad_norm": 0.21262546928012396, | |
| "learning_rate": 3.8447147148007735e-07, | |
| "loss": 0.2991, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.665042174320525, | |
| "grad_norm": 0.2185036816100282, | |
| "learning_rate": 3.761293933447868e-07, | |
| "loss": 0.2959, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.6687910028116213, | |
| "grad_norm": 0.22509882595410716, | |
| "learning_rate": 3.678752737267627e-07, | |
| "loss": 0.2961, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.6725398313027178, | |
| "grad_norm": 0.2209295427900408, | |
| "learning_rate": 3.597092696416704e-07, | |
| "loss": 0.296, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.6762886597938147, | |
| "grad_norm": 0.20839767657345984, | |
| "learning_rate": 3.5163153642898073e-07, | |
| "loss": 0.2986, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.6800374882849107, | |
| "grad_norm": 0.2170005019845449, | |
| "learning_rate": 3.4364222774901366e-07, | |
| "loss": 0.2969, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.6837863167760077, | |
| "grad_norm": 0.22790793305608095, | |
| "learning_rate": 3.357414955800148e-07, | |
| "loss": 0.2967, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.687535145267104, | |
| "grad_norm": 0.2179962319863983, | |
| "learning_rate": 3.2792949021526686e-07, | |
| "loss": 0.2965, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.6912839737582006, | |
| "grad_norm": 0.2176296940618202, | |
| "learning_rate": 3.202063602602262e-07, | |
| "loss": 0.2982, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.695032802249297, | |
| "grad_norm": 0.22847934720081933, | |
| "learning_rate": 3.1257225262970146e-07, | |
| "loss": 0.2946, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.6987816307403936, | |
| "grad_norm": 0.20161821165065855, | |
| "learning_rate": 3.050273125450537e-07, | |
| "loss": 0.2958, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.70253045923149, | |
| "grad_norm": 0.21754547873420735, | |
| "learning_rate": 2.9757168353143795e-07, | |
| "loss": 0.2931, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.7062792877225865, | |
| "grad_norm": 0.20927718552782254, | |
| "learning_rate": 2.9020550741507003e-07, | |
| "loss": 0.296, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.7100281162136834, | |
| "grad_norm": 0.21673136214793481, | |
| "learning_rate": 2.829289243205313e-07, | |
| "loss": 0.2977, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.7137769447047795, | |
| "grad_norm": 0.21738597303662568, | |
| "learning_rate": 2.7574207266810095e-07, | |
| "loss": 0.297, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.7175257731958764, | |
| "grad_norm": 0.22090904296834485, | |
| "learning_rate": 2.686450891711223e-07, | |
| "loss": 0.3019, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.721274601686973, | |
| "grad_norm": 0.22588390372378073, | |
| "learning_rate": 2.6163810883340633e-07, | |
| "loss": 0.2992, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.7250234301780694, | |
| "grad_norm": 0.2171446833278576, | |
| "learning_rate": 2.547212649466568e-07, | |
| "loss": 0.2958, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.728772258669166, | |
| "grad_norm": 0.20897228290385925, | |
| "learning_rate": 2.478946890879419e-07, | |
| "loss": 0.2987, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.7325210871602623, | |
| "grad_norm": 0.21930393993687972, | |
| "learning_rate": 2.4115851111718767e-07, | |
| "loss": 0.2984, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.736269915651359, | |
| "grad_norm": 0.2086577507401556, | |
| "learning_rate": 2.3451285917470478e-07, | |
| "loss": 0.2948, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.7400187441424553, | |
| "grad_norm": 0.24689056017428762, | |
| "learning_rate": 2.2795785967875794e-07, | |
| "loss": 0.2949, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.743767572633552, | |
| "grad_norm": 0.2202426775150526, | |
| "learning_rate": 2.214936373231552e-07, | |
| "loss": 0.2964, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.7475164011246487, | |
| "grad_norm": 0.22114657231547902, | |
| "learning_rate": 2.151203150748793e-07, | |
| "loss": 0.295, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.751265229615745, | |
| "grad_norm": 0.20989352158816843, | |
| "learning_rate": 2.08838014171745e-07, | |
| "loss": 0.2948, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.7550140581068416, | |
| "grad_norm": 0.21653508921554077, | |
| "learning_rate": 2.0264685412009765e-07, | |
| "loss": 0.2913, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.758762886597938, | |
| "grad_norm": 0.20856496439474423, | |
| "learning_rate": 1.9654695269253676e-07, | |
| "loss": 0.2966, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.7625117150890346, | |
| "grad_norm": 0.21697328504317934, | |
| "learning_rate": 1.9053842592567372e-07, | |
| "loss": 0.2935, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.766260543580131, | |
| "grad_norm": 0.223018905717748, | |
| "learning_rate": 1.846213881179304e-07, | |
| "loss": 0.2943, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.770009372071228, | |
| "grad_norm": 0.22471887950418276, | |
| "learning_rate": 1.7879595182735853e-07, | |
| "loss": 0.294, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.773758200562324, | |
| "grad_norm": 0.2274675818451194, | |
| "learning_rate": 1.7306222786950266e-07, | |
| "loss": 0.2976, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.777507029053421, | |
| "grad_norm": 0.21745114552538827, | |
| "learning_rate": 1.6742032531529117e-07, | |
| "loss": 0.2984, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.7812558575445174, | |
| "grad_norm": 0.209520126032932, | |
| "learning_rate": 1.618703514889608e-07, | |
| "loss": 0.294, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.785004686035614, | |
| "grad_norm": 0.2152117690983746, | |
| "learning_rate": 1.5641241196601542e-07, | |
| "loss": 0.2947, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.7887535145267104, | |
| "grad_norm": 0.20856820453640396, | |
| "learning_rate": 1.5104661057121605e-07, | |
| "loss": 0.2934, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.792502343017807, | |
| "grad_norm": 0.2087726976922736, | |
| "learning_rate": 1.457730493766113e-07, | |
| "loss": 0.2997, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.7962511715089033, | |
| "grad_norm": 0.21039795736062336, | |
| "learning_rate": 1.4059182869958776e-07, | |
| "loss": 0.2944, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.24255936306012296, | |
| "learning_rate": 1.355030471009683e-07, | |
| "loss": 0.2952, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.8037488284910967, | |
| "grad_norm": 0.228361544622829, | |
| "learning_rate": 1.3050680138313398e-07, | |
| "loss": 0.2968, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.8074976569821932, | |
| "grad_norm": 0.2788669586751136, | |
| "learning_rate": 1.2560318658818238e-07, | |
| "loss": 0.2919, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.8112464854732897, | |
| "grad_norm": 0.21485061261933228, | |
| "learning_rate": 1.2079229599612274e-07, | |
| "loss": 0.2948, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.814995313964386, | |
| "grad_norm": 0.20880096710948615, | |
| "learning_rate": 1.160742211230964e-07, | |
| "loss": 0.2966, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.8187441424554827, | |
| "grad_norm": 0.20194538376919882, | |
| "learning_rate": 1.1144905171964149e-07, | |
| "loss": 0.2975, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.822492970946579, | |
| "grad_norm": 0.20774178992713427, | |
| "learning_rate": 1.0691687576898202e-07, | |
| "loss": 0.2982, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.8262417994376756, | |
| "grad_norm": 0.208737099957496, | |
| "learning_rate": 1.0247777948535432e-07, | |
| "loss": 0.2925, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.829990627928772, | |
| "grad_norm": 0.2176776156807812, | |
| "learning_rate": 9.813184731236935e-08, | |
| "loss": 0.2932, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.8337394564198686, | |
| "grad_norm": 0.207539461770748, | |
| "learning_rate": 9.3879161921403e-08, | |
| "loss": 0.295, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.8374882849109655, | |
| "grad_norm": 0.2118079972476284, | |
| "learning_rate": 8.971980421002779e-08, | |
| "loss": 0.2946, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.841237113402062, | |
| "grad_norm": 0.20851272394405806, | |
| "learning_rate": 8.565385330046915e-08, | |
| "loss": 0.2997, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.8449859418931585, | |
| "grad_norm": 0.21224194574719543, | |
| "learning_rate": 8.168138653810387e-08, | |
| "loss": 0.2966, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.848734770384255, | |
| "grad_norm": 0.20967024625449956, | |
| "learning_rate": 7.780247948998788e-08, | |
| "loss": 0.2993, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.8524835988753514, | |
| "grad_norm": 0.2261170709858784, | |
| "learning_rate": 7.401720594341688e-08, | |
| "loss": 0.2947, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.856232427366448, | |
| "grad_norm": 0.2001298846468726, | |
| "learning_rate": 7.032563790452585e-08, | |
| "loss": 0.2976, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.8599812558575444, | |
| "grad_norm": 0.22681383953814865, | |
| "learning_rate": 6.672784559691725e-08, | |
| "loss": 0.2983, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.8637300843486413, | |
| "grad_norm": 0.21103315361356145, | |
| "learning_rate": 6.322389746032608e-08, | |
| "loss": 0.2956, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.8674789128397373, | |
| "grad_norm": 0.202377754899562, | |
| "learning_rate": 5.981386014931645e-08, | |
| "loss": 0.297, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.8712277413308342, | |
| "grad_norm": 0.21978839641973172, | |
| "learning_rate": 5.649779853201587e-08, | |
| "loss": 0.2957, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.8749765698219307, | |
| "grad_norm": 0.20879696172885226, | |
| "learning_rate": 5.3275775688879096e-08, | |
| "loss": 0.2951, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.878725398313027, | |
| "grad_norm": 0.20388100110101098, | |
| "learning_rate": 5.0147852911489606e-08, | |
| "loss": 0.2975, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.8824742268041237, | |
| "grad_norm": 0.21370045275380867, | |
| "learning_rate": 4.7114089701393864e-08, | |
| "loss": 0.2948, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.88622305529522, | |
| "grad_norm": 0.2060746309945781, | |
| "learning_rate": 4.4174543768968346e-08, | |
| "loss": 0.2945, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.8899718837863166, | |
| "grad_norm": 0.20704697522314686, | |
| "learning_rate": 4.132927103232209e-08, | |
| "loss": 0.2966, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.893720712277413, | |
| "grad_norm": 0.22365022572567808, | |
| "learning_rate": 3.857832561623309e-08, | |
| "loss": 0.2972, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.89746954076851, | |
| "grad_norm": 0.20659583365699133, | |
| "learning_rate": 3.592175985111968e-08, | |
| "loss": 0.2961, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.9012183692596065, | |
| "grad_norm": 0.20618218716255435, | |
| "learning_rate": 3.3359624272042976e-08, | |
| "loss": 0.2952, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.904967197750703, | |
| "grad_norm": 0.2149240713982645, | |
| "learning_rate": 3.089196761774715e-08, | |
| "loss": 0.2955, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.9087160262417995, | |
| "grad_norm": 0.22355201181105996, | |
| "learning_rate": 2.8518836829732332e-08, | |
| "loss": 0.2977, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.912464854732896, | |
| "grad_norm": 0.22264076611026476, | |
| "learning_rate": 2.6240277051359788e-08, | |
| "loss": 0.2987, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.9162136832239924, | |
| "grad_norm": 0.2033040105643027, | |
| "learning_rate": 2.4056331626995943e-08, | |
| "loss": 0.2967, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.919962511715089, | |
| "grad_norm": 0.2029744325807162, | |
| "learning_rate": 2.1967042101185832e-08, | |
| "loss": 0.2984, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.923711340206186, | |
| "grad_norm": 0.2207170499349933, | |
| "learning_rate": 1.9972448217863706e-08, | |
| "loss": 0.2944, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.927460168697282, | |
| "grad_norm": 0.2190536979775703, | |
| "learning_rate": 1.807258791959643e-08, | |
| "loss": 0.299, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 2.931208997188379, | |
| "grad_norm": 0.20453932822962492, | |
| "learning_rate": 1.626749734686295e-08, | |
| "loss": 0.2965, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 2.9349578256794753, | |
| "grad_norm": 0.21158692274365276, | |
| "learning_rate": 1.4557210837364278e-08, | |
| "loss": 0.2971, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 2.9387066541705718, | |
| "grad_norm": 0.22314299534375273, | |
| "learning_rate": 1.2941760925372914e-08, | |
| "loss": 0.296, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 2.9424554826616682, | |
| "grad_norm": 0.21201880468903128, | |
| "learning_rate": 1.1421178341112781e-08, | |
| "loss": 0.2981, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.9462043111527647, | |
| "grad_norm": 0.19995417097694335, | |
| "learning_rate": 9.995492010175245e-09, | |
| "loss": 0.2969, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 2.949953139643861, | |
| "grad_norm": 0.2032618514771412, | |
| "learning_rate": 8.66472905296678e-09, | |
| "loss": 0.2958, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 2.9537019681349577, | |
| "grad_norm": 0.22054734778678423, | |
| "learning_rate": 7.428914784197161e-09, | |
| "loss": 0.2984, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 2.9574507966260546, | |
| "grad_norm": 0.21531333241349596, | |
| "learning_rate": 6.288072712393734e-09, | |
| "loss": 0.2961, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 2.9611996251171506, | |
| "grad_norm": 0.21334038501182684, | |
| "learning_rate": 5.24222453945622e-09, | |
| "loss": 0.2968, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.9649484536082475, | |
| "grad_norm": 0.21379304132484478, | |
| "learning_rate": 4.291390160243713e-09, | |
| "loss": 0.2987, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 2.968697282099344, | |
| "grad_norm": 0.21586668554028848, | |
| "learning_rate": 3.435587662196094e-09, | |
| "loss": 0.2963, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 2.9724461105904405, | |
| "grad_norm": 0.21612407035729198, | |
| "learning_rate": 2.6748333249909665e-09, | |
| "loss": 0.2947, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 2.976194939081537, | |
| "grad_norm": 0.20332605208597299, | |
| "learning_rate": 2.0091416202316918e-09, | |
| "loss": 0.2988, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 2.9799437675726335, | |
| "grad_norm": 0.2079341599363278, | |
| "learning_rate": 1.4385252111737136e-09, | |
| "loss": 0.2919, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.98369259606373, | |
| "grad_norm": 0.20606235066584186, | |
| "learning_rate": 9.629949524830873e-10, | |
| "loss": 0.2952, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 2.9874414245548264, | |
| "grad_norm": 0.2280988323439713, | |
| "learning_rate": 5.825598900316421e-10, | |
| "loss": 0.2963, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 2.9911902530459233, | |
| "grad_norm": 0.21299797878972357, | |
| "learning_rate": 2.972272607221216e-10, | |
| "loss": 0.2919, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 2.99493908153702, | |
| "grad_norm": 0.21214297989590156, | |
| "learning_rate": 1.0700249235218175e-10, | |
| "loss": 0.2962, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 2.9986879100281163, | |
| "grad_norm": 0.21387066401777824, | |
| "learning_rate": 1.1889203511139536e-11, | |
| "loss": 0.299, | |
| "step": 8000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 8004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.990281269536358e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |