| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 3765, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007971303308090873, | |
| "grad_norm": 2.7439849590205014, | |
| "learning_rate": 2.387267904509284e-07, | |
| "loss": 0.4952, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015942606616181746, | |
| "grad_norm": 1.8269416958822857, | |
| "learning_rate": 5.039787798408489e-07, | |
| "loss": 0.4898, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023913909924272617, | |
| "grad_norm": 1.373955440239158, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 0.4661, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03188521323236349, | |
| "grad_norm": 0.7072675856563305, | |
| "learning_rate": 1.0344827586206898e-06, | |
| "loss": 0.4337, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03985651654045436, | |
| "grad_norm": 0.7236913112540133, | |
| "learning_rate": 1.29973474801061e-06, | |
| "loss": 0.4091, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.047827819848545235, | |
| "grad_norm": 0.42830095771838445, | |
| "learning_rate": 1.5649867374005307e-06, | |
| "loss": 0.391, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05579912315663611, | |
| "grad_norm": 0.24985255941378587, | |
| "learning_rate": 1.830238726790451e-06, | |
| "loss": 0.3755, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06377042646472698, | |
| "grad_norm": 0.22053904249111797, | |
| "learning_rate": 2.0954907161803713e-06, | |
| "loss": 0.3646, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07174172977281786, | |
| "grad_norm": 0.1871454926752817, | |
| "learning_rate": 2.360742705570292e-06, | |
| "loss": 0.3585, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07971303308090873, | |
| "grad_norm": 0.16957059006933034, | |
| "learning_rate": 2.625994694960212e-06, | |
| "loss": 0.3537, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0876843363889996, | |
| "grad_norm": 0.15807877454450825, | |
| "learning_rate": 2.891246684350133e-06, | |
| "loss": 0.3481, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09565563969709047, | |
| "grad_norm": 0.1696204667980688, | |
| "learning_rate": 3.1564986737400535e-06, | |
| "loss": 0.3455, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.10362694300518134, | |
| "grad_norm": 0.18296589316110073, | |
| "learning_rate": 3.4217506631299737e-06, | |
| "loss": 0.3374, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.11159824631327223, | |
| "grad_norm": 0.1589874660304547, | |
| "learning_rate": 3.6870026525198943e-06, | |
| "loss": 0.3353, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1195695496213631, | |
| "grad_norm": 0.16713671478912018, | |
| "learning_rate": 3.9522546419098145e-06, | |
| "loss": 0.3311, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12754085292945397, | |
| "grad_norm": 0.19094654720327028, | |
| "learning_rate": 4.217506631299735e-06, | |
| "loss": 0.3298, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13551215623754484, | |
| "grad_norm": 0.17173127985031086, | |
| "learning_rate": 4.482758620689656e-06, | |
| "loss": 0.3297, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1434834595456357, | |
| "grad_norm": 0.16743742436159792, | |
| "learning_rate": 4.748010610079576e-06, | |
| "loss": 0.3255, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.15145476285372658, | |
| "grad_norm": 0.20163674324120764, | |
| "learning_rate": 5.013262599469496e-06, | |
| "loss": 0.325, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15942606616181745, | |
| "grad_norm": 0.16037098448631043, | |
| "learning_rate": 5.278514588859417e-06, | |
| "loss": 0.3197, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16739736946990832, | |
| "grad_norm": 0.21353712697354443, | |
| "learning_rate": 5.5437665782493376e-06, | |
| "loss": 0.319, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1753686727779992, | |
| "grad_norm": 0.19869628289479196, | |
| "learning_rate": 5.809018567639257e-06, | |
| "loss": 0.3156, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18333997608609007, | |
| "grad_norm": 0.1975130616223235, | |
| "learning_rate": 6.074270557029178e-06, | |
| "loss": 0.3172, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19131127939418094, | |
| "grad_norm": 0.1902414828411451, | |
| "learning_rate": 6.339522546419099e-06, | |
| "loss": 0.3122, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1992825827022718, | |
| "grad_norm": 0.22225170253654492, | |
| "learning_rate": 6.6047745358090184e-06, | |
| "loss": 0.3073, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20725388601036268, | |
| "grad_norm": 0.19343451278128465, | |
| "learning_rate": 6.87002652519894e-06, | |
| "loss": 0.309, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.21522518931845358, | |
| "grad_norm": 0.21183101802781412, | |
| "learning_rate": 7.1352785145888606e-06, | |
| "loss": 0.3124, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22319649262654445, | |
| "grad_norm": 0.26603427067291613, | |
| "learning_rate": 7.40053050397878e-06, | |
| "loss": 0.3059, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.23116779593463532, | |
| "grad_norm": 0.22261379017016011, | |
| "learning_rate": 7.6657824933687e-06, | |
| "loss": 0.3094, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2391390992427262, | |
| "grad_norm": 0.2024824115097555, | |
| "learning_rate": 7.93103448275862e-06, | |
| "loss": 0.3084, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24711040255081707, | |
| "grad_norm": 0.2042942185636207, | |
| "learning_rate": 8.196286472148541e-06, | |
| "loss": 0.3057, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.25508170585890794, | |
| "grad_norm": 0.18736233349181933, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 0.304, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2630530091669988, | |
| "grad_norm": 0.21102640951635082, | |
| "learning_rate": 8.726790450928383e-06, | |
| "loss": 0.3033, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2710243124750897, | |
| "grad_norm": 0.21618009137316338, | |
| "learning_rate": 8.992042440318303e-06, | |
| "loss": 0.3021, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27899561578318055, | |
| "grad_norm": 0.19515018580578244, | |
| "learning_rate": 9.257294429708224e-06, | |
| "loss": 0.2978, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2869669190912714, | |
| "grad_norm": 0.24140786468138617, | |
| "learning_rate": 9.522546419098145e-06, | |
| "loss": 0.3012, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2949382223993623, | |
| "grad_norm": 0.27497175793322665, | |
| "learning_rate": 9.787798408488064e-06, | |
| "loss": 0.3013, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.30290952570745316, | |
| "grad_norm": 0.23551643286267343, | |
| "learning_rate": 9.99999140169557e-06, | |
| "loss": 0.3002, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.31088082901554404, | |
| "grad_norm": 0.24946640275348875, | |
| "learning_rate": 9.99969046414561e-06, | |
| "loss": 0.2955, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3188521323236349, | |
| "grad_norm": 0.2424205054036822, | |
| "learning_rate": 9.998959640946033e-06, | |
| "loss": 0.2976, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3268234356317258, | |
| "grad_norm": 0.2844168535727217, | |
| "learning_rate": 9.997798994934812e-06, | |
| "loss": 0.2977, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.33479473893981665, | |
| "grad_norm": 0.25483658348181026, | |
| "learning_rate": 9.99620862590714e-06, | |
| "loss": 0.2951, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3427660422479075, | |
| "grad_norm": 0.23711613447438198, | |
| "learning_rate": 9.994188670606845e-06, | |
| "loss": 0.2952, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3507373455559984, | |
| "grad_norm": 0.2028196656708933, | |
| "learning_rate": 9.99173930271464e-06, | |
| "loss": 0.2972, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35870864886408926, | |
| "grad_norm": 0.23753065276887678, | |
| "learning_rate": 9.988860732833183e-06, | |
| "loss": 0.294, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.36667995217218013, | |
| "grad_norm": 0.21519789088821045, | |
| "learning_rate": 9.98555320846897e-06, | |
| "loss": 0.2949, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.374651255480271, | |
| "grad_norm": 0.24780874257317498, | |
| "learning_rate": 9.981817014011066e-06, | |
| "loss": 0.2911, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3826225587883619, | |
| "grad_norm": 0.2419346606109402, | |
| "learning_rate": 9.977652470706629e-06, | |
| "loss": 0.2923, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.39059386209645275, | |
| "grad_norm": 0.22080537805937434, | |
| "learning_rate": 9.973059936633308e-06, | |
| "loss": 0.2908, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3985651654045436, | |
| "grad_norm": 0.22915945812817434, | |
| "learning_rate": 9.968039806668448e-06, | |
| "loss": 0.2934, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4065364687126345, | |
| "grad_norm": 0.25832882794920076, | |
| "learning_rate": 9.96259251245514e-06, | |
| "loss": 0.2892, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.41450777202072536, | |
| "grad_norm": 0.20890334095429705, | |
| "learning_rate": 9.956718522365098e-06, | |
| "loss": 0.29, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.42247907532881623, | |
| "grad_norm": 0.19222031712823726, | |
| "learning_rate": 9.950418341458398e-06, | |
| "loss": 0.2936, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.43045037863690716, | |
| "grad_norm": 0.229251945332095, | |
| "learning_rate": 9.943692511440051e-06, | |
| "loss": 0.2903, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.43842168194499803, | |
| "grad_norm": 0.19910329695969214, | |
| "learning_rate": 9.936541610613417e-06, | |
| "loss": 0.2882, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4463929852530889, | |
| "grad_norm": 0.1980158753040197, | |
| "learning_rate": 9.928966253830492e-06, | |
| "loss": 0.288, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4543642885611798, | |
| "grad_norm": 0.21294837161238345, | |
| "learning_rate": 9.920967092439028e-06, | |
| "loss": 0.2901, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.46233559186927065, | |
| "grad_norm": 0.2345212892254059, | |
| "learning_rate": 9.912544814226547e-06, | |
| "loss": 0.2889, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4703068951773615, | |
| "grad_norm": 0.2653674551152453, | |
| "learning_rate": 9.903700143361185e-06, | |
| "loss": 0.2884, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4782781984854524, | |
| "grad_norm": 0.22315589547019074, | |
| "learning_rate": 9.894433840329442e-06, | |
| "loss": 0.288, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.48624950179354326, | |
| "grad_norm": 0.27243118100516245, | |
| "learning_rate": 9.884746701870778e-06, | |
| "loss": 0.2876, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.49422080510163413, | |
| "grad_norm": 0.2642805191794303, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 0.2855, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.502192108409725, | |
| "grad_norm": 0.21570422679762521, | |
| "learning_rate": 9.864113286481237e-06, | |
| "loss": 0.2848, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5101634117178159, | |
| "grad_norm": 0.20704843013033902, | |
| "learning_rate": 9.853168783662028e-06, | |
| "loss": 0.2873, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5181347150259067, | |
| "grad_norm": 0.18056431665581077, | |
| "learning_rate": 9.841806993486686e-06, | |
| "loss": 0.2839, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5261060183339976, | |
| "grad_norm": 0.2105787281096281, | |
| "learning_rate": 9.830028892869804e-06, | |
| "loss": 0.2813, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5340773216420884, | |
| "grad_norm": 0.21257758348049322, | |
| "learning_rate": 9.81783549452136e-06, | |
| "loss": 0.2846, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5420486249501794, | |
| "grad_norm": 0.21961213318400993, | |
| "learning_rate": 9.805227846859652e-06, | |
| "loss": 0.2829, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5500199282582702, | |
| "grad_norm": 0.25199803254623604, | |
| "learning_rate": 9.792207033921152e-06, | |
| "loss": 0.2883, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5579912315663611, | |
| "grad_norm": 0.20221809778648672, | |
| "learning_rate": 9.778774175267294e-06, | |
| "loss": 0.2842, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.565962534874452, | |
| "grad_norm": 0.1898841298780325, | |
| "learning_rate": 9.764930425888216e-06, | |
| "loss": 0.282, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5739338381825428, | |
| "grad_norm": 0.20117780815968267, | |
| "learning_rate": 9.750676976103444e-06, | |
| "loss": 0.2839, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5819051414906338, | |
| "grad_norm": 0.21755712645273997, | |
| "learning_rate": 9.736015051459551e-06, | |
| "loss": 0.2819, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5898764447987246, | |
| "grad_norm": 0.20291021122674352, | |
| "learning_rate": 9.720945912624783e-06, | |
| "loss": 0.2836, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5978477481068155, | |
| "grad_norm": 0.20194444409505907, | |
| "learning_rate": 9.705470855280661e-06, | |
| "loss": 0.2833, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6058190514149063, | |
| "grad_norm": 0.20304767059054613, | |
| "learning_rate": 9.689591210010572e-06, | |
| "loss": 0.2825, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6137903547229973, | |
| "grad_norm": 0.2648940083523449, | |
| "learning_rate": 9.673308342185366e-06, | |
| "loss": 0.282, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6217616580310881, | |
| "grad_norm": 0.21769630697418604, | |
| "learning_rate": 9.65662365184596e-06, | |
| "loss": 0.2774, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.629732961339179, | |
| "grad_norm": 0.392188226969322, | |
| "learning_rate": 9.639538573582952e-06, | |
| "loss": 0.2819, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6377042646472698, | |
| "grad_norm": 0.20739255958270508, | |
| "learning_rate": 9.62205457641328e-06, | |
| "loss": 0.2814, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6456755679553607, | |
| "grad_norm": 0.19797948805471874, | |
| "learning_rate": 9.604173163653906e-06, | |
| "loss": 0.2807, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6536468712634516, | |
| "grad_norm": 0.22378074781887625, | |
| "learning_rate": 9.58589587279256e-06, | |
| "loss": 0.2779, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6616181745715425, | |
| "grad_norm": 0.19730401928407432, | |
| "learning_rate": 9.567224275355538e-06, | |
| "loss": 0.2807, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6695894778796333, | |
| "grad_norm": 0.19232291589840167, | |
| "learning_rate": 9.548159976772593e-06, | |
| "loss": 0.2803, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6775607811877242, | |
| "grad_norm": 0.23171459344457662, | |
| "learning_rate": 9.528704616238875e-06, | |
| "loss": 0.2794, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.685532084495815, | |
| "grad_norm": 0.2316345630782031, | |
| "learning_rate": 9.508859866574003e-06, | |
| "loss": 0.2802, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.693503387803906, | |
| "grad_norm": 0.20634122518883097, | |
| "learning_rate": 9.488627434078232e-06, | |
| "loss": 0.2814, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7014746911119968, | |
| "grad_norm": 0.28423540381597584, | |
| "learning_rate": 9.468009058385735e-06, | |
| "loss": 0.277, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7094459944200877, | |
| "grad_norm": 0.19838823753708112, | |
| "learning_rate": 9.447006512315025e-06, | |
| "loss": 0.2775, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7174172977281785, | |
| "grad_norm": 0.1988655247541702, | |
| "learning_rate": 9.425621601716531e-06, | |
| "loss": 0.278, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7253886010362695, | |
| "grad_norm": 0.18011113329739104, | |
| "learning_rate": 9.403856165317322e-06, | |
| "loss": 0.2786, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7333599043443603, | |
| "grad_norm": 0.1844142249739523, | |
| "learning_rate": 9.381712074563006e-06, | |
| "loss": 0.2785, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7413312076524512, | |
| "grad_norm": 0.2148813967605786, | |
| "learning_rate": 9.359191233456821e-06, | |
| "loss": 0.2785, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.749302510960542, | |
| "grad_norm": 0.1883568911372003, | |
| "learning_rate": 9.336295578395927e-06, | |
| "loss": 0.2789, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7572738142686329, | |
| "grad_norm": 0.2340210607240215, | |
| "learning_rate": 9.313027078004903e-06, | |
| "loss": 0.2789, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7652451175767238, | |
| "grad_norm": 0.19429300696808027, | |
| "learning_rate": 9.289387732966492e-06, | |
| "loss": 0.2788, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7732164208848147, | |
| "grad_norm": 0.1933521060098035, | |
| "learning_rate": 9.265379575849561e-06, | |
| "loss": 0.2743, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7811877241929055, | |
| "grad_norm": 0.2008930847687583, | |
| "learning_rate": 9.241004670934348e-06, | |
| "loss": 0.2746, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7891590275009964, | |
| "grad_norm": 0.20419942608243757, | |
| "learning_rate": 9.216265114034964e-06, | |
| "loss": 0.2761, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7971303308090872, | |
| "grad_norm": 0.20885521941941515, | |
| "learning_rate": 9.191163032319198e-06, | |
| "loss": 0.2799, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8051016341171782, | |
| "grad_norm": 0.20033581634243633, | |
| "learning_rate": 9.1657005841256e-06, | |
| "loss": 0.2773, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.813072937425269, | |
| "grad_norm": 0.22147195992757276, | |
| "learning_rate": 9.139879958777931e-06, | |
| "loss": 0.275, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8210442407333599, | |
| "grad_norm": 0.20166804562350057, | |
| "learning_rate": 9.113703376396885e-06, | |
| "loss": 0.2755, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8290155440414507, | |
| "grad_norm": 0.19260698449209812, | |
| "learning_rate": 9.087173087709226e-06, | |
| "loss": 0.2742, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8369868473495417, | |
| "grad_norm": 0.2116198417344192, | |
| "learning_rate": 9.060291373854252e-06, | |
| "loss": 0.2749, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8449581506576325, | |
| "grad_norm": 0.27126785391794345, | |
| "learning_rate": 9.033060546187651e-06, | |
| "loss": 0.2774, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8529294539657234, | |
| "grad_norm": 0.2361830324015449, | |
| "learning_rate": 9.005482946082784e-06, | |
| "loss": 0.2724, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8609007572738143, | |
| "grad_norm": 0.23551093980233517, | |
| "learning_rate": 8.97756094472935e-06, | |
| "loss": 0.2762, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8688720605819051, | |
| "grad_norm": 0.21298565207438697, | |
| "learning_rate": 8.949296942929515e-06, | |
| "loss": 0.2753, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8768433638899961, | |
| "grad_norm": 0.20749916709788005, | |
| "learning_rate": 8.92069337089148e-06, | |
| "loss": 0.278, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8848146671980869, | |
| "grad_norm": 0.17956706004286632, | |
| "learning_rate": 8.891752688020532e-06, | |
| "loss": 0.2775, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8927859705061778, | |
| "grad_norm": 0.1953798521876003, | |
| "learning_rate": 8.862477382707569e-06, | |
| "loss": 0.2741, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9007572738142686, | |
| "grad_norm": 0.2294774184802095, | |
| "learning_rate": 8.832869972115148e-06, | |
| "loss": 0.2736, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9087285771223595, | |
| "grad_norm": 0.18067067247270632, | |
| "learning_rate": 8.802933001961058e-06, | |
| "loss": 0.2737, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9166998804304504, | |
| "grad_norm": 0.2146301016081478, | |
| "learning_rate": 8.77266904629942e-06, | |
| "loss": 0.2733, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9246711837385413, | |
| "grad_norm": 0.18563813225207978, | |
| "learning_rate": 8.742080707299381e-06, | |
| "loss": 0.2734, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9326424870466321, | |
| "grad_norm": 0.19921223017975606, | |
| "learning_rate": 8.71117061502135e-06, | |
| "loss": 0.2762, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.940613790354723, | |
| "grad_norm": 0.19733160963975976, | |
| "learning_rate": 8.679941427190884e-06, | |
| "loss": 0.275, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9485850936628138, | |
| "grad_norm": 0.20922845272298418, | |
| "learning_rate": 8.64839582897015e-06, | |
| "loss": 0.2712, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9565563969709048, | |
| "grad_norm": 0.17679207911764114, | |
| "learning_rate": 8.616536532727062e-06, | |
| "loss": 0.2716, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9645277002789956, | |
| "grad_norm": 0.1871171741554302, | |
| "learning_rate": 8.584366277802057e-06, | |
| "loss": 0.271, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.9724990035870865, | |
| "grad_norm": 0.20200784391473173, | |
| "learning_rate": 8.55188783027256e-06, | |
| "loss": 0.2722, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.9804703068951773, | |
| "grad_norm": 0.2060568121760763, | |
| "learning_rate": 8.519103982715158e-06, | |
| "loss": 0.2695, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9884416102032683, | |
| "grad_norm": 0.19358577088132986, | |
| "learning_rate": 8.486017553965475e-06, | |
| "loss": 0.2701, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9964129135113591, | |
| "grad_norm": 0.2434707357861983, | |
| "learning_rate": 8.452631388875814e-06, | |
| "loss": 0.2731, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0039856516540455, | |
| "grad_norm": 0.2616385036392448, | |
| "learning_rate": 8.418948358070535e-06, | |
| "loss": 0.2665, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.0119569549621363, | |
| "grad_norm": 0.2115757131765118, | |
| "learning_rate": 8.384971357699255e-06, | |
| "loss": 0.2627, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0199282582702272, | |
| "grad_norm": 0.20239812309837896, | |
| "learning_rate": 8.3507033091878e-06, | |
| "loss": 0.2662, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.027899561578318, | |
| "grad_norm": 0.20274705613036875, | |
| "learning_rate": 8.316147158987036e-06, | |
| "loss": 0.2637, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.035870864886409, | |
| "grad_norm": 0.1854664643809721, | |
| "learning_rate": 8.281305878319519e-06, | |
| "loss": 0.2627, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.0438421681944998, | |
| "grad_norm": 0.20204039521748993, | |
| "learning_rate": 8.246182462924022e-06, | |
| "loss": 0.2625, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.0518134715025906, | |
| "grad_norm": 0.21618960118522135, | |
| "learning_rate": 8.210779932797954e-06, | |
| "loss": 0.2693, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.0597847748106815, | |
| "grad_norm": 0.178361480629425, | |
| "learning_rate": 8.175101331937692e-06, | |
| "loss": 0.261, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.0677560781187725, | |
| "grad_norm": 0.25878248216098365, | |
| "learning_rate": 8.139149728076852e-06, | |
| "loss": 0.2634, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.0757273814268633, | |
| "grad_norm": 0.2033185860677756, | |
| "learning_rate": 8.102928212422519e-06, | |
| "loss": 0.2646, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.0836986847349541, | |
| "grad_norm": 0.2143942468103035, | |
| "learning_rate": 8.066439899389451e-06, | |
| "loss": 0.264, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.091669988043045, | |
| "grad_norm": 0.18331709491785228, | |
| "learning_rate": 8.02968792633231e-06, | |
| "loss": 0.2646, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.099641291351136, | |
| "grad_norm": 0.19827283155969086, | |
| "learning_rate": 7.99267545327588e-06, | |
| "loss": 0.2648, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.1076125946592268, | |
| "grad_norm": 0.24364988811035662, | |
| "learning_rate": 7.955405662643384e-06, | |
| "loss": 0.2601, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.1155838979673176, | |
| "grad_norm": 0.19923690656335546, | |
| "learning_rate": 7.917881758982838e-06, | |
| "loss": 0.2638, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1235552012754084, | |
| "grad_norm": 0.19790386629961096, | |
| "learning_rate": 7.880106968691516e-06, | |
| "loss": 0.2647, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.1315265045834995, | |
| "grad_norm": 0.209982262643357, | |
| "learning_rate": 7.842084539738547e-06, | |
| "loss": 0.2629, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.1394978078915903, | |
| "grad_norm": 0.1865353794893146, | |
| "learning_rate": 7.803817741385636e-06, | |
| "loss": 0.2622, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.147469111199681, | |
| "grad_norm": 0.18591402004381719, | |
| "learning_rate": 7.765309863905965e-06, | |
| "loss": 0.2638, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.1554404145077721, | |
| "grad_norm": 0.22287948038133532, | |
| "learning_rate": 7.726564218301298e-06, | |
| "loss": 0.2658, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.163411717815863, | |
| "grad_norm": 0.18704477851949775, | |
| "learning_rate": 7.68758413601728e-06, | |
| "loss": 0.2632, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.1713830211239538, | |
| "grad_norm": 0.18630909945867757, | |
| "learning_rate": 7.648372968656995e-06, | |
| "loss": 0.2629, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.1793543244320446, | |
| "grad_norm": 0.5386080569158398, | |
| "learning_rate": 7.608934087692794e-06, | |
| "loss": 0.2612, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.1873256277401354, | |
| "grad_norm": 0.1746043720739328, | |
| "learning_rate": 7.569270884176401e-06, | |
| "loss": 0.2609, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.1952969310482264, | |
| "grad_norm": 0.19541388116589287, | |
| "learning_rate": 7.529386768447342e-06, | |
| "loss": 0.2642, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2032682343563172, | |
| "grad_norm": 0.2015022076748336, | |
| "learning_rate": 7.4892851698397174e-06, | |
| "loss": 0.2638, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.211239537664408, | |
| "grad_norm": 0.18974402819968988, | |
| "learning_rate": 7.448969536387339e-06, | |
| "loss": 0.2617, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.219210840972499, | |
| "grad_norm": 0.19142664444724963, | |
| "learning_rate": 7.408443334527257e-06, | |
| "loss": 0.2644, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.22718214428059, | |
| "grad_norm": 0.18678354662021696, | |
| "learning_rate": 7.367710048801715e-06, | |
| "loss": 0.26, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.2351534475886807, | |
| "grad_norm": 0.20449677931821042, | |
| "learning_rate": 7.326773181558532e-06, | |
| "loss": 0.2593, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.2431247508967715, | |
| "grad_norm": 0.17656320872206302, | |
| "learning_rate": 7.285636252649965e-06, | |
| "loss": 0.2629, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.2510960542048624, | |
| "grad_norm": 0.18132917187985256, | |
| "learning_rate": 7.244302799130064e-06, | |
| "loss": 0.2632, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.2590673575129534, | |
| "grad_norm": 0.19148004299294089, | |
| "learning_rate": 7.202776374950549e-06, | |
| "loss": 0.262, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.2670386608210442, | |
| "grad_norm": 0.17407075104632905, | |
| "learning_rate": 7.161060550655227e-06, | |
| "loss": 0.2614, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.275009964129135, | |
| "grad_norm": 0.1805208863871377, | |
| "learning_rate": 7.119158913072996e-06, | |
| "loss": 0.2602, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.282981267437226, | |
| "grad_norm": 0.1700912662455128, | |
| "learning_rate": 7.0770750650094335e-06, | |
| "loss": 0.2601, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.2909525707453169, | |
| "grad_norm": 0.20872692606059245, | |
| "learning_rate": 7.03481262493702e-06, | |
| "loss": 0.2623, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.2989238740534077, | |
| "grad_norm": 0.18231553621037966, | |
| "learning_rate": 6.992375226684016e-06, | |
| "loss": 0.2612, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.3068951773614987, | |
| "grad_norm": 0.3260893903118034, | |
| "learning_rate": 6.949766519122021e-06, | |
| "loss": 0.2593, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.3148664806695896, | |
| "grad_norm": 0.1837719103909305, | |
| "learning_rate": 6.906990165852218e-06, | |
| "loss": 0.2631, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.3228377839776804, | |
| "grad_norm": 0.17385746180137812, | |
| "learning_rate": 6.864049844890389e-06, | |
| "loss": 0.2601, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.3308090872857712, | |
| "grad_norm": 0.18767118916567374, | |
| "learning_rate": 6.820949248350653e-06, | |
| "loss": 0.2599, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.338780390593862, | |
| "grad_norm": 0.18523703670706165, | |
| "learning_rate": 6.777692082128024e-06, | |
| "loss": 0.2611, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.346751693901953, | |
| "grad_norm": 0.17761446472146364, | |
| "learning_rate": 6.734282065579757e-06, | |
| "loss": 0.2596, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.3547229972100439, | |
| "grad_norm": 0.20263313570108268, | |
| "learning_rate": 6.690722931205551e-06, | |
| "loss": 0.2579, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3626943005181347, | |
| "grad_norm": 0.19596135949066837, | |
| "learning_rate": 6.6470184243266235e-06, | |
| "loss": 0.2594, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.3706656038262257, | |
| "grad_norm": 0.17389328177855742, | |
| "learning_rate": 6.6031723027636775e-06, | |
| "loss": 0.2601, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.3786369071343165, | |
| "grad_norm": 0.17594519271532788, | |
| "learning_rate": 6.559188336513794e-06, | |
| "loss": 0.2609, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.3866082104424073, | |
| "grad_norm": 0.17131298098330294, | |
| "learning_rate": 6.515070307426279e-06, | |
| "loss": 0.2639, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.3945795137504982, | |
| "grad_norm": 0.18366655818427982, | |
| "learning_rate": 6.470822008877482e-06, | |
| "loss": 0.2643, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.402550817058589, | |
| "grad_norm": 0.16290637917636525, | |
| "learning_rate": 6.4264472454446535e-06, | |
| "loss": 0.2589, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.41052212036668, | |
| "grad_norm": 0.1923343830812872, | |
| "learning_rate": 6.381949832578796e-06, | |
| "loss": 0.2597, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.4184934236747708, | |
| "grad_norm": 0.16772495714207045, | |
| "learning_rate": 6.337333596276613e-06, | |
| "loss": 0.2605, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.4264647269828616, | |
| "grad_norm": 0.18790213961542337, | |
| "learning_rate": 6.292602372751536e-06, | |
| "loss": 0.2604, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.4344360302909527, | |
| "grad_norm": 0.18406526632785894, | |
| "learning_rate": 6.247760008103889e-06, | |
| "loss": 0.2607, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.4424073335990435, | |
| "grad_norm": 0.18222410408713163, | |
| "learning_rate": 6.2028103579901725e-06, | |
| "loss": 0.2615, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.4503786369071343, | |
| "grad_norm": 0.18828145882782565, | |
| "learning_rate": 6.157757287291557e-06, | |
| "loss": 0.2614, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.4583499402152251, | |
| "grad_norm": 0.16998195784557607, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.2581, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.466321243523316, | |
| "grad_norm": 0.16771798799179155, | |
| "learning_rate": 6.0673563877930244e-06, | |
| "loss": 0.259, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.474292546831407, | |
| "grad_norm": 0.17650338257347983, | |
| "learning_rate": 6.022016331884185e-06, | |
| "loss": 0.2611, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.4822638501394978, | |
| "grad_norm": 0.1883582677936213, | |
| "learning_rate": 5.9765884005042725e-06, | |
| "loss": 0.2577, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.4902351534475886, | |
| "grad_norm": 0.1903059355062031, | |
| "learning_rate": 5.931076499658258e-06, | |
| "loss": 0.2561, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.4982064567556796, | |
| "grad_norm": 0.170261118553718, | |
| "learning_rate": 5.8854845425710085e-06, | |
| "loss": 0.2574, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.5061777600637705, | |
| "grad_norm": 0.1693099595303142, | |
| "learning_rate": 5.839816449350824e-06, | |
| "loss": 0.2603, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.5141490633718613, | |
| "grad_norm": 0.1818861367650445, | |
| "learning_rate": 5.7940761466523795e-06, | |
| "loss": 0.2648, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5221203666799523, | |
| "grad_norm": 0.21748921810474134, | |
| "learning_rate": 5.748267567339093e-06, | |
| "loss": 0.2555, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.530091669988043, | |
| "grad_norm": 0.1903700684723453, | |
| "learning_rate": 5.702394650144975e-06, | |
| "loss": 0.2602, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.538062973296134, | |
| "grad_norm": 0.17836978417633012, | |
| "learning_rate": 5.656461339335968e-06, | |
| "loss": 0.2577, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.5460342766042248, | |
| "grad_norm": 0.1627820959396175, | |
| "learning_rate": 5.6104715843708e-06, | |
| "loss": 0.2611, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.5540055799123156, | |
| "grad_norm": 0.15537918892349203, | |
| "learning_rate": 5.564429339561411e-06, | |
| "loss": 0.2592, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.5619768832204066, | |
| "grad_norm": 0.1692865509471413, | |
| "learning_rate": 5.518338563732945e-06, | |
| "loss": 0.2557, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.5699481865284974, | |
| "grad_norm": 0.18872166066900145, | |
| "learning_rate": 5.4722032198833595e-06, | |
| "loss": 0.2597, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.5779194898365883, | |
| "grad_norm": 0.16793201436135893, | |
| "learning_rate": 5.426027274842683e-06, | |
| "loss": 0.2612, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.5858907931446793, | |
| "grad_norm": 0.1614692177614517, | |
| "learning_rate": 5.379814698931935e-06, | |
| "loss": 0.257, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.5938620964527699, | |
| "grad_norm": 0.17158275465453685, | |
| "learning_rate": 5.3335694656217405e-06, | |
| "loss": 0.2604, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.601833399760861, | |
| "grad_norm": 0.16178053730940672, | |
| "learning_rate": 5.2872955511906974e-06, | |
| "loss": 0.258, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.6098047030689517, | |
| "grad_norm": 0.17144071756850604, | |
| "learning_rate": 5.2409969343834675e-06, | |
| "loss": 0.2596, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.6177760063770426, | |
| "grad_norm": 0.16798857484611837, | |
| "learning_rate": 5.194677596068689e-06, | |
| "loss": 0.2598, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.6257473096851336, | |
| "grad_norm": 0.16245877571502684, | |
| "learning_rate": 5.1483415188966855e-06, | |
| "loss": 0.2621, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.6337186129932244, | |
| "grad_norm": 0.22338180692114515, | |
| "learning_rate": 5.101992686957028e-06, | |
| "loss": 0.2579, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.6416899163013152, | |
| "grad_norm": 0.1777225738766675, | |
| "learning_rate": 5.055635085435972e-06, | |
| "loss": 0.2559, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.6496612196094063, | |
| "grad_norm": 0.17571885043922136, | |
| "learning_rate": 5.009272700273804e-06, | |
| "loss": 0.2598, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.6576325229174969, | |
| "grad_norm": 0.16140144516975566, | |
| "learning_rate": 4.962909517822125e-06, | |
| "loss": 0.2555, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.665603826225588, | |
| "grad_norm": 0.18160320235099942, | |
| "learning_rate": 4.91654952450108e-06, | |
| "loss": 0.2559, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.6735751295336787, | |
| "grad_norm": 0.18372880830782032, | |
| "learning_rate": 4.870196706456609e-06, | |
| "loss": 0.262, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6815464328417695, | |
| "grad_norm": 0.16897256178696948, | |
| "learning_rate": 4.8238550492177065e-06, | |
| "loss": 0.2566, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.6895177361498606, | |
| "grad_norm": 0.16383359612759563, | |
| "learning_rate": 4.777528537353729e-06, | |
| "loss": 0.258, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.6974890394579514, | |
| "grad_norm": 0.16504846683572935, | |
| "learning_rate": 4.7312211541318e-06, | |
| "loss": 0.258, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.7054603427660422, | |
| "grad_norm": 0.18461915132331697, | |
| "learning_rate": 4.684936881174314e-06, | |
| "loss": 0.259, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.7134316460741332, | |
| "grad_norm": 0.18262043780620987, | |
| "learning_rate": 4.638679698116588e-06, | |
| "loss": 0.2597, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.721402949382224, | |
| "grad_norm": 0.17541591405265763, | |
| "learning_rate": 4.592453582264684e-06, | |
| "loss": 0.2554, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.7293742526903149, | |
| "grad_norm": 0.1653040886084589, | |
| "learning_rate": 4.546262508253429e-06, | |
| "loss": 0.2584, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.737345555998406, | |
| "grad_norm": 0.1585102987285458, | |
| "learning_rate": 4.500110447704666e-06, | |
| "loss": 0.2593, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.7453168593064965, | |
| "grad_norm": 0.16189117020291785, | |
| "learning_rate": 4.454001368885764e-06, | |
| "loss": 0.2568, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.7532881626145875, | |
| "grad_norm": 0.1754043030670264, | |
| "learning_rate": 4.40793923636842e-06, | |
| "loss": 0.2557, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.7612594659226783, | |
| "grad_norm": 0.1798179829068267, | |
| "learning_rate": 4.3619280106877716e-06, | |
| "loss": 0.2572, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.7692307692307692, | |
| "grad_norm": 0.16313092179418315, | |
| "learning_rate": 4.315971648001861e-06, | |
| "loss": 0.2556, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.7772020725388602, | |
| "grad_norm": 0.177395532354502, | |
| "learning_rate": 4.270074099751478e-06, | |
| "loss": 0.2542, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.785173375846951, | |
| "grad_norm": 0.16407314616761093, | |
| "learning_rate": 4.224239312320399e-06, | |
| "loss": 0.257, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.7931446791550418, | |
| "grad_norm": 0.1611929306688441, | |
| "learning_rate": 4.178471226696073e-06, | |
| "loss": 0.2572, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8011159824631329, | |
| "grad_norm": 0.15344135774159312, | |
| "learning_rate": 4.132773778130766e-06, | |
| "loss": 0.2551, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.8090872857712235, | |
| "grad_norm": 0.18018747885039685, | |
| "learning_rate": 4.087150895803192e-06, | |
| "loss": 0.2562, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.8170585890793145, | |
| "grad_norm": 0.16109719534481134, | |
| "learning_rate": 4.041606502480684e-06, | |
| "loss": 0.2544, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.8250298923874053, | |
| "grad_norm": 0.15229386523298538, | |
| "learning_rate": 3.996144514181891e-06, | |
| "loss": 0.254, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.8330011956954961, | |
| "grad_norm": 0.1732832205396586, | |
| "learning_rate": 3.950768839840079e-06, | |
| "loss": 0.2568, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8409724990035872, | |
| "grad_norm": 0.17030412533612874, | |
| "learning_rate": 3.905483380967027e-06, | |
| "loss": 0.2559, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.848943802311678, | |
| "grad_norm": 0.1646050569595746, | |
| "learning_rate": 3.8602920313175684e-06, | |
| "loss": 0.2584, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.8569151056197688, | |
| "grad_norm": 0.18112487950883321, | |
| "learning_rate": 3.815198676554794e-06, | |
| "loss": 0.2577, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.8648864089278598, | |
| "grad_norm": 0.15970698627462004, | |
| "learning_rate": 3.7702071939159535e-06, | |
| "loss": 0.2574, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.8728577122359504, | |
| "grad_norm": 0.16592497935881761, | |
| "learning_rate": 3.7253214518790814e-06, | |
| "loss": 0.2528, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.8808290155440415, | |
| "grad_norm": 0.1907236587733275, | |
| "learning_rate": 3.6805453098303757e-06, | |
| "loss": 0.2592, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.8888003188521323, | |
| "grad_norm": 0.1636603572774249, | |
| "learning_rate": 3.63588261773236e-06, | |
| "loss": 0.2602, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.896771622160223, | |
| "grad_norm": 0.15180772024085912, | |
| "learning_rate": 3.5913372157928515e-06, | |
| "loss": 0.2563, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.9047429254683141, | |
| "grad_norm": 0.1597945224171933, | |
| "learning_rate": 3.546912934134773e-06, | |
| "loss": 0.2549, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.912714228776405, | |
| "grad_norm": 0.1548083962863071, | |
| "learning_rate": 3.502613592466826e-06, | |
| "loss": 0.2572, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.9206855320844958, | |
| "grad_norm": 0.1481441665132999, | |
| "learning_rate": 3.4584429997550685e-06, | |
| "loss": 0.2575, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.9286568353925868, | |
| "grad_norm": 0.15555262051404722, | |
| "learning_rate": 3.414404953895406e-06, | |
| "loss": 0.2552, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.9366281387006774, | |
| "grad_norm": 0.15133232327459284, | |
| "learning_rate": 3.3705032413870402e-06, | |
| "loss": 0.2539, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.9445994420087684, | |
| "grad_norm": 0.15739551231138196, | |
| "learning_rate": 3.326741637006896e-06, | |
| "loss": 0.2546, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.9525707453168593, | |
| "grad_norm": 0.17129713309093755, | |
| "learning_rate": 3.2831239034850593e-06, | |
| "loss": 0.254, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.96054204862495, | |
| "grad_norm": 0.17872233586244216, | |
| "learning_rate": 3.2396537911812454e-06, | |
| "loss": 0.2535, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.9685133519330411, | |
| "grad_norm": 0.15620956109745782, | |
| "learning_rate": 3.196335037762337e-06, | |
| "loss": 0.2566, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.976484655241132, | |
| "grad_norm": 0.1442172315199126, | |
| "learning_rate": 3.1531713678810076e-06, | |
| "loss": 0.2529, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.9844559585492227, | |
| "grad_norm": 0.15796499558853128, | |
| "learning_rate": 3.110166492855468e-06, | |
| "loss": 0.2551, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.9924272618573138, | |
| "grad_norm": 0.15453646599732415, | |
| "learning_rate": 3.0673241103503572e-06, | |
| "loss": 0.2515, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.1972849604645947, | |
| "learning_rate": 3.0246479040588077e-06, | |
| "loss": 0.2551, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.007971303308091, | |
| "grad_norm": 0.1644857365812511, | |
| "learning_rate": 2.9821415433857174e-06, | |
| "loss": 0.2503, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.0159426066161816, | |
| "grad_norm": 0.15167361754961786, | |
| "learning_rate": 2.939808683132238e-06, | |
| "loss": 0.248, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.0239139099242727, | |
| "grad_norm": 0.15143886743143137, | |
| "learning_rate": 2.897652963181529e-06, | |
| "loss": 0.2475, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.0318852132323637, | |
| "grad_norm": 0.15557561304873513, | |
| "learning_rate": 2.8556780081857966e-06, | |
| "loss": 0.2502, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.0398565165404543, | |
| "grad_norm": 0.14833684638790712, | |
| "learning_rate": 2.813887427254626e-06, | |
| "loss": 0.247, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.0478278198485453, | |
| "grad_norm": 0.15160958104161015, | |
| "learning_rate": 2.772284813644675e-06, | |
| "loss": 0.2485, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.055799123156636, | |
| "grad_norm": 0.1463790587181485, | |
| "learning_rate": 2.7308737444507037e-06, | |
| "loss": 0.2464, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.063770426464727, | |
| "grad_norm": 0.14689650797158363, | |
| "learning_rate": 2.689657780298019e-06, | |
| "loss": 0.2491, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.071741729772818, | |
| "grad_norm": 0.15966093074503412, | |
| "learning_rate": 2.648640465036316e-06, | |
| "loss": 0.2485, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.0797130330809086, | |
| "grad_norm": 0.2092933023915233, | |
| "learning_rate": 2.6078253254349706e-06, | |
| "loss": 0.2468, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.0876843363889996, | |
| "grad_norm": 0.15161101989978912, | |
| "learning_rate": 2.5672158708797953e-06, | |
| "loss": 0.2474, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.0956556396970907, | |
| "grad_norm": 0.13872835429015348, | |
| "learning_rate": 2.526815593071306e-06, | |
| "loss": 0.2479, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.1036269430051813, | |
| "grad_norm": 0.15338466332562303, | |
| "learning_rate": 2.486627965724482e-06, | |
| "loss": 0.2498, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.1115982463132723, | |
| "grad_norm": 0.14140026219879168, | |
| "learning_rate": 2.4466564442700974e-06, | |
| "loss": 0.2498, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.119569549621363, | |
| "grad_norm": 0.15970341507801283, | |
| "learning_rate": 2.406904465557614e-06, | |
| "loss": 0.2466, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.127540852929454, | |
| "grad_norm": 0.1519435398581199, | |
| "learning_rate": 2.3673754475596634e-06, | |
| "loss": 0.2472, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.135512156237545, | |
| "grad_norm": 0.14371921030590182, | |
| "learning_rate": 2.3280727890781753e-06, | |
| "loss": 0.2471, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.1434834595456356, | |
| "grad_norm": 0.1480883729791422, | |
| "learning_rate": 2.2889998694521257e-06, | |
| "loss": 0.2486, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.1514547628537266, | |
| "grad_norm": 0.1635322392602264, | |
| "learning_rate": 2.2501600482669865e-06, | |
| "loss": 0.2503, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.1594260661618176, | |
| "grad_norm": 0.17232127721119667, | |
| "learning_rate": 2.211556665065854e-06, | |
| "loss": 0.2484, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.1673973694699082, | |
| "grad_norm": 0.1480271607060395, | |
| "learning_rate": 2.173193039062299e-06, | |
| "loss": 0.2507, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.1753686727779993, | |
| "grad_norm": 0.1602014965172441, | |
| "learning_rate": 2.1350724688549906e-06, | |
| "loss": 0.2514, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.18333997608609, | |
| "grad_norm": 0.1414641476617348, | |
| "learning_rate": 2.0971982321440553e-06, | |
| "loss": 0.248, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.191311279394181, | |
| "grad_norm": 0.15546919413747778, | |
| "learning_rate": 2.0595735854492675e-06, | |
| "loss": 0.2487, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.199282582702272, | |
| "grad_norm": 0.14859695582253002, | |
| "learning_rate": 2.0222017638300394e-06, | |
| "loss": 0.2469, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.2072538860103625, | |
| "grad_norm": 0.15297411724937537, | |
| "learning_rate": 1.9850859806072576e-06, | |
| "loss": 0.2449, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.2152251893184536, | |
| "grad_norm": 0.3819615108389116, | |
| "learning_rate": 1.9482294270870055e-06, | |
| "loss": 0.2469, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.2231964926265446, | |
| "grad_norm": 0.15939599378059569, | |
| "learning_rate": 1.9116352722861596e-06, | |
| "loss": 0.2472, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.231167795934635, | |
| "grad_norm": 0.1555518437281259, | |
| "learning_rate": 1.8753066626599086e-06, | |
| "loss": 0.2508, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.2391390992427262, | |
| "grad_norm": 0.1420441078873591, | |
| "learning_rate": 1.839246721831215e-06, | |
| "loss": 0.2484, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.247110402550817, | |
| "grad_norm": 0.1436721984643807, | |
| "learning_rate": 1.8034585503222441e-06, | |
| "loss": 0.2469, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.255081705858908, | |
| "grad_norm": 0.14135560041109888, | |
| "learning_rate": 1.7679452252877622e-06, | |
| "loss": 0.2465, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.263053009166999, | |
| "grad_norm": 0.1504613140361942, | |
| "learning_rate": 1.7327098002505681e-06, | |
| "loss": 0.2444, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.2710243124750895, | |
| "grad_norm": 0.17027410047278219, | |
| "learning_rate": 1.6977553048389306e-06, | |
| "loss": 0.2461, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.2789956157831806, | |
| "grad_norm": 0.1715439427086149, | |
| "learning_rate": 1.663084744526105e-06, | |
| "loss": 0.2481, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.2869669190912716, | |
| "grad_norm": 0.147378563017796, | |
| "learning_rate": 1.6287011003719105e-06, | |
| "loss": 0.2452, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.294938222399362, | |
| "grad_norm": 0.13827676791752133, | |
| "learning_rate": 1.5946073287664065e-06, | |
| "loss": 0.2492, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.302909525707453, | |
| "grad_norm": 0.15079331024865258, | |
| "learning_rate": 1.5608063611757058e-06, | |
| "loss": 0.249, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.3108808290155443, | |
| "grad_norm": 0.14708063904393764, | |
| "learning_rate": 1.5273011038899066e-06, | |
| "loss": 0.2514, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.318852132323635, | |
| "grad_norm": 0.14020201643369284, | |
| "learning_rate": 1.4940944377732168e-06, | |
| "loss": 0.2477, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.326823435631726, | |
| "grad_norm": 0.1528037921936168, | |
| "learning_rate": 1.4611892180162407e-06, | |
| "loss": 0.2469, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.3347947389398165, | |
| "grad_norm": 0.13915360250390427, | |
| "learning_rate": 1.4285882738904822e-06, | |
| "loss": 0.2468, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.3427660422479075, | |
| "grad_norm": 0.14250210180453843, | |
| "learning_rate": 1.3962944085050833e-06, | |
| "loss": 0.248, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.3507373455559986, | |
| "grad_norm": 0.15103609439825436, | |
| "learning_rate": 1.3643103985658047e-06, | |
| "loss": 0.2471, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.358708648864089, | |
| "grad_norm": 0.1343269921202141, | |
| "learning_rate": 1.332638994136269e-06, | |
| "loss": 0.2446, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.36667995217218, | |
| "grad_norm": 0.15449698277173599, | |
| "learning_rate": 1.301282918401518e-06, | |
| "loss": 0.2444, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.374651255480271, | |
| "grad_norm": 0.1445514536177799, | |
| "learning_rate": 1.270244867433853e-06, | |
| "loss": 0.2512, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.382622558788362, | |
| "grad_norm": 0.15118149213707782, | |
| "learning_rate": 1.2395275099610272e-06, | |
| "loss": 0.2495, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.390593862096453, | |
| "grad_norm": 0.13886439394677316, | |
| "learning_rate": 1.2091334871367838e-06, | |
| "loss": 0.246, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.3985651654045435, | |
| "grad_norm": 0.13698163877896719, | |
| "learning_rate": 1.1790654123137552e-06, | |
| "loss": 0.2487, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.4065364687126345, | |
| "grad_norm": 0.13611193490421775, | |
| "learning_rate": 1.1493258708187677e-06, | |
| "loss": 0.2462, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.4145077720207255, | |
| "grad_norm": 0.13752762254360268, | |
| "learning_rate": 1.1199174197305473e-06, | |
| "loss": 0.2464, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.422479075328816, | |
| "grad_norm": 0.13534708608338716, | |
| "learning_rate": 1.0908425876598512e-06, | |
| "loss": 0.2471, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.430450378636907, | |
| "grad_norm": 0.14072307465294032, | |
| "learning_rate": 1.0621038745320579e-06, | |
| "loss": 0.2507, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.438421681944998, | |
| "grad_norm": 0.18149670536468343, | |
| "learning_rate": 1.0337037513722154e-06, | |
| "loss": 0.2468, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.446392985253089, | |
| "grad_norm": 0.13777290158405223, | |
| "learning_rate": 1.0056446600925718e-06, | |
| "loss": 0.2467, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.45436428856118, | |
| "grad_norm": 0.13677789276728541, | |
| "learning_rate": 9.779290132826224e-07, | |
| "loss": 0.2481, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.4623355918692704, | |
| "grad_norm": 0.14535155203152114, | |
| "learning_rate": 9.505591940016601e-07, | |
| "loss": 0.246, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.4703068951773615, | |
| "grad_norm": 0.13976318624658268, | |
| "learning_rate": 9.235375555738824e-07, | |
| "loss": 0.2463, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.4782781984854525, | |
| "grad_norm": 0.13568166041286991, | |
| "learning_rate": 8.968664213860417e-07, | |
| "loss": 0.2472, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.486249501793543, | |
| "grad_norm": 0.13510001777472666, | |
| "learning_rate": 8.705480846876746e-07, | |
| "loss": 0.2453, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.494220805101634, | |
| "grad_norm": 0.14225142264009744, | |
| "learning_rate": 8.445848083939267e-07, | |
| "loss": 0.2466, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.5021921084097247, | |
| "grad_norm": 0.13719258420894617, | |
| "learning_rate": 8.189788248909763e-07, | |
| "loss": 0.246, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.5101634117178158, | |
| "grad_norm": 0.13873842823619842, | |
| "learning_rate": 7.937323358440935e-07, | |
| "loss": 0.2466, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.518134715025907, | |
| "grad_norm": 0.15089712483314988, | |
| "learning_rate": 7.688475120083349e-07, | |
| "loss": 0.2477, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.526106018333998, | |
| "grad_norm": 0.13447979411026342, | |
| "learning_rate": 7.443264930418886e-07, | |
| "loss": 0.2463, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.5340773216420884, | |
| "grad_norm": 0.1380057513415994, | |
| "learning_rate": 7.201713873221134e-07, | |
| "loss": 0.2495, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.5420486249501795, | |
| "grad_norm": 0.14450360624810707, | |
| "learning_rate": 6.963842717642488e-07, | |
| "loss": 0.2499, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.55001992825827, | |
| "grad_norm": 0.2060655057691808, | |
| "learning_rate": 6.72967191642836e-07, | |
| "loss": 0.247, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.557991231566361, | |
| "grad_norm": 0.12851695195695914, | |
| "learning_rate": 6.499221604158623e-07, | |
| "loss": 0.246, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.565962534874452, | |
| "grad_norm": 0.16068250540689558, | |
| "learning_rate": 6.2725115955164e-07, | |
| "loss": 0.2457, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.5739338381825427, | |
| "grad_norm": 0.1362948462489877, | |
| "learning_rate": 6.049561383584301e-07, | |
| "loss": 0.2485, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.5819051414906338, | |
| "grad_norm": 0.14318195107122192, | |
| "learning_rate": 5.830390138168435e-07, | |
| "loss": 0.2457, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.5898764447987244, | |
| "grad_norm": 0.15160361757805285, | |
| "learning_rate": 5.615016704150056e-07, | |
| "loss": 0.2437, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.5978477481068154, | |
| "grad_norm": 0.14208542489599402, | |
| "learning_rate": 5.403459599865307e-07, | |
| "loss": 0.2456, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.6058190514149064, | |
| "grad_norm": 0.1342083762390786, | |
| "learning_rate": 5.195737015512947e-07, | |
| "loss": 0.2449, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.6137903547229975, | |
| "grad_norm": 0.12996044889937128, | |
| "learning_rate": 4.991866811590268e-07, | |
| "loss": 0.2482, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.621761658031088, | |
| "grad_norm": 0.13276419525758598, | |
| "learning_rate": 4.791866517357491e-07, | |
| "loss": 0.2478, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.629732961339179, | |
| "grad_norm": 0.13410510712928858, | |
| "learning_rate": 4.5957533293304655e-07, | |
| "loss": 0.2465, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.6377042646472697, | |
| "grad_norm": 0.13836647678174321, | |
| "learning_rate": 4.403544109802144e-07, | |
| "loss": 0.2446, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.6456755679553607, | |
| "grad_norm": 0.14173844811223874, | |
| "learning_rate": 4.2152553853926914e-07, | |
| "loss": 0.2467, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.653646871263452, | |
| "grad_norm": 0.12963202838388832, | |
| "learning_rate": 4.0309033456284565e-07, | |
| "loss": 0.2461, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.6616181745715424, | |
| "grad_norm": 0.12985066342865778, | |
| "learning_rate": 3.850503841550024e-07, | |
| "loss": 0.2441, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.6695894778796334, | |
| "grad_norm": 0.13686704000194505, | |
| "learning_rate": 3.674072384349242e-07, | |
| "loss": 0.2494, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.677560781187724, | |
| "grad_norm": 0.13198096936683557, | |
| "learning_rate": 3.501624144035559e-07, | |
| "loss": 0.2466, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.685532084495815, | |
| "grad_norm": 0.13285862104009824, | |
| "learning_rate": 3.333173948131663e-07, | |
| "loss": 0.249, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.693503387803906, | |
| "grad_norm": 0.13123763580997755, | |
| "learning_rate": 3.1687362803985987e-07, | |
| "loss": 0.2485, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.7014746911119967, | |
| "grad_norm": 0.130635888923708, | |
| "learning_rate": 3.008325279590357e-07, | |
| "loss": 0.2464, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.7094459944200877, | |
| "grad_norm": 0.12903560375175332, | |
| "learning_rate": 2.851954738238277e-07, | |
| "loss": 0.2464, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.7174172977281783, | |
| "grad_norm": 0.13068172271012624, | |
| "learning_rate": 2.6996381014650353e-07, | |
| "loss": 0.2477, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.7253886010362693, | |
| "grad_norm": 0.12948155434817346, | |
| "learning_rate": 2.5513884658286745e-07, | |
| "loss": 0.2454, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.7333599043443604, | |
| "grad_norm": 0.13478087793899063, | |
| "learning_rate": 2.407218578196524e-07, | |
| "loss": 0.2446, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.7413312076524514, | |
| "grad_norm": 0.14200513684004365, | |
| "learning_rate": 2.267140834649123e-07, | |
| "loss": 0.2475, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.749302510960542, | |
| "grad_norm": 0.1388036591016315, | |
| "learning_rate": 2.13116727941447e-07, | |
| "loss": 0.2458, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.757273814268633, | |
| "grad_norm": 0.13274417766749047, | |
| "learning_rate": 1.9993096038323556e-07, | |
| "loss": 0.2478, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.7652451175767236, | |
| "grad_norm": 0.1309898112310216, | |
| "learning_rate": 1.8715791453491562e-07, | |
| "loss": 0.2473, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.7732164208848147, | |
| "grad_norm": 0.12855050002466045, | |
| "learning_rate": 1.7479868865430072e-07, | |
| "loss": 0.246, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.7811877241929057, | |
| "grad_norm": 0.1297939051422684, | |
| "learning_rate": 1.6285434541794598e-07, | |
| "loss": 0.2454, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.7891590275009963, | |
| "grad_norm": 0.1360452914842712, | |
| "learning_rate": 1.5132591182978107e-07, | |
| "loss": 0.2478, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.7971303308090874, | |
| "grad_norm": 0.1303778236512888, | |
| "learning_rate": 1.4021437913280366e-07, | |
| "loss": 0.2473, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.805101634117178, | |
| "grad_norm": 0.12494111165590116, | |
| "learning_rate": 1.2952070272384986e-07, | |
| "loss": 0.2472, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.813072937425269, | |
| "grad_norm": 0.13077601667810976, | |
| "learning_rate": 1.192458020714482e-07, | |
| "loss": 0.2447, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.82104424073336, | |
| "grad_norm": 0.13202872119004214, | |
| "learning_rate": 1.0939056063675846e-07, | |
| "loss": 0.2467, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.8290155440414506, | |
| "grad_norm": 0.1282605177841322, | |
| "learning_rate": 9.995582579761243e-08, | |
| "loss": 0.2479, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.8369868473495417, | |
| "grad_norm": 0.12641719729667697, | |
| "learning_rate": 9.094240877565441e-08, | |
| "loss": 0.2473, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.8449581506576322, | |
| "grad_norm": 0.133124543261481, | |
| "learning_rate": 8.235108456658814e-08, | |
| "loss": 0.2456, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.8529294539657233, | |
| "grad_norm": 0.126398551708617, | |
| "learning_rate": 7.418259187354227e-08, | |
| "loss": 0.248, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.8609007572738143, | |
| "grad_norm": 0.12941811506345907, | |
| "learning_rate": 6.643763304355566e-08, | |
| "loss": 0.2465, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.8688720605819054, | |
| "grad_norm": 0.12633613236354346, | |
| "learning_rate": 5.911687400718458e-08, | |
| "loss": 0.2484, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.876843363889996, | |
| "grad_norm": 0.13027882462021592, | |
| "learning_rate": 5.222094422124846e-08, | |
| "loss": 0.2478, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.884814667198087, | |
| "grad_norm": 0.130436804547978, | |
| "learning_rate": 4.57504366147038e-08, | |
| "loss": 0.2502, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.8927859705061776, | |
| "grad_norm": 0.13485396495421867, | |
| "learning_rate": 3.970590753766712e-08, | |
| "loss": 0.2475, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.9007572738142686, | |
| "grad_norm": 0.1357506612910079, | |
| "learning_rate": 3.408787671357494e-08, | |
| "loss": 0.2485, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.9087285771223597, | |
| "grad_norm": 0.1324337960159484, | |
| "learning_rate": 2.8896827194496713e-08, | |
| "loss": 0.2507, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.9166998804304503, | |
| "grad_norm": 0.1384258646412873, | |
| "learning_rate": 2.4133205319603614e-08, | |
| "loss": 0.2471, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.9246711837385413, | |
| "grad_norm": 0.12538463682333836, | |
| "learning_rate": 1.9797420676788692e-08, | |
| "loss": 0.2465, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.932642487046632, | |
| "grad_norm": 0.13488887387214157, | |
| "learning_rate": 1.5889846067450586e-08, | |
| "loss": 0.2473, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.940613790354723, | |
| "grad_norm": 0.1414038209388306, | |
| "learning_rate": 1.241081747443862e-08, | |
| "loss": 0.2481, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.948585093662814, | |
| "grad_norm": 0.13040100044435768, | |
| "learning_rate": 9.36063403316534e-09, | |
| "loss": 0.2463, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.956556396970905, | |
| "grad_norm": 0.12638246739069472, | |
| "learning_rate": 6.739558005884883e-09, | |
| "loss": 0.2465, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.9645277002789956, | |
| "grad_norm": 0.13324839473704572, | |
| "learning_rate": 4.547814759142122e-09, | |
| "loss": 0.2505, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.9724990035870866, | |
| "grad_norm": 0.1305883337577235, | |
| "learning_rate": 2.785592744398713e-09, | |
| "loss": 0.2478, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.9804703068951772, | |
| "grad_norm": 0.13393956028092843, | |
| "learning_rate": 1.453043481824401e-09, | |
| "loss": 0.2459, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.9884416102032683, | |
| "grad_norm": 0.13005754003113282, | |
| "learning_rate": 5.50281547275211e-10, | |
| "loss": 0.2488, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.9964129135113593, | |
| "grad_norm": 0.13834661200938075, | |
| "learning_rate": 7.738456243466808e-11, | |
| "loss": 0.2472, | |
| "step": 3760 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3765, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.647275000450253e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |