| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9998683627921449, |
| "eval_steps": 500, |
| "global_step": 2611, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0038294460466953076, |
| "grad_norm": 5.426074325401742, |
| "learning_rate": 5.69620253164557e-07, |
| "loss": 0.596, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007658892093390615, |
| "grad_norm": 1.7300049140411797, |
| "learning_rate": 1.2025316455696204e-06, |
| "loss": 0.4935, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011488338140085923, |
| "grad_norm": 1.0566614384820672, |
| "learning_rate": 1.8354430379746838e-06, |
| "loss": 0.4179, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01531778418678123, |
| "grad_norm": 1.1239284474445808, |
| "learning_rate": 2.4683544303797473e-06, |
| "loss": 0.3786, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.019147230233476538, |
| "grad_norm": 0.8236458977275282, |
| "learning_rate": 3.10126582278481e-06, |
| "loss": 0.3645, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.022976676280171845, |
| "grad_norm": 0.939010895376297, |
| "learning_rate": 3.7341772151898737e-06, |
| "loss": 0.3472, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.026806122326867153, |
| "grad_norm": 0.7558771041067948, |
| "learning_rate": 4.367088607594937e-06, |
| "loss": 0.3234, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03063556837356246, |
| "grad_norm": 0.8507599742006083, |
| "learning_rate": 5e-06, |
| "loss": 0.3273, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03446501442025777, |
| "grad_norm": 0.9322867483149297, |
| "learning_rate": 4.999807568225742e-06, |
| "loss": 0.3269, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.038294460466953076, |
| "grad_norm": 0.8060180679646447, |
| "learning_rate": 4.999230302526956e-06, |
| "loss": 0.338, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04212390651364838, |
| "grad_norm": 0.8178741447804784, |
| "learning_rate": 4.998268291771053e-06, |
| "loss": 0.3232, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04595335256034369, |
| "grad_norm": 0.8264908445738317, |
| "learning_rate": 4.9969216840551815e-06, |
| "loss": 0.3239, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.049782798607039, |
| "grad_norm": 0.8185737508740193, |
| "learning_rate": 4.995190686683432e-06, |
| "loss": 0.3164, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.053612244653734306, |
| "grad_norm": 0.8016123974519153, |
| "learning_rate": 4.9930755661349215e-06, |
| "loss": 0.3227, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.057441690700429614, |
| "grad_norm": 0.8304543418674754, |
| "learning_rate": 4.990576648022768e-06, |
| "loss": 0.3136, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06127113674712492, |
| "grad_norm": 0.7555621229293721, |
| "learning_rate": 4.98769431704397e-06, |
| "loss": 0.3033, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06510058279382024, |
| "grad_norm": 0.8637646507692361, |
| "learning_rate": 4.984429016920178e-06, |
| "loss": 0.3231, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06893002884051554, |
| "grad_norm": 0.7476252312804286, |
| "learning_rate": 4.980781250329389e-06, |
| "loss": 0.309, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07275947488721085, |
| "grad_norm": 0.8589580099130042, |
| "learning_rate": 4.976751578828562e-06, |
| "loss": 0.3122, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07658892093390615, |
| "grad_norm": 0.8355945240964501, |
| "learning_rate": 4.9723406227671645e-06, |
| "loss": 0.3109, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08041836698060147, |
| "grad_norm": 0.7902874463038752, |
| "learning_rate": 4.967549061191679e-06, |
| "loss": 0.3118, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08424781302729677, |
| "grad_norm": 0.8876153745394134, |
| "learning_rate": 4.962377631741061e-06, |
| "loss": 0.306, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08807725907399208, |
| "grad_norm": 0.866679126237035, |
| "learning_rate": 4.956827130533185e-06, |
| "loss": 0.3135, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09190670512068738, |
| "grad_norm": 0.8629644307536233, |
| "learning_rate": 4.95089841204229e-06, |
| "loss": 0.302, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0957361511673827, |
| "grad_norm": 0.7338565663547403, |
| "learning_rate": 4.9445923889674285e-06, |
| "loss": 0.303, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.099565597214078, |
| "grad_norm": 0.7305791719581167, |
| "learning_rate": 4.937910032091968e-06, |
| "loss": 0.3009, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10339504326077331, |
| "grad_norm": 0.6695669065750437, |
| "learning_rate": 4.9308523701341415e-06, |
| "loss": 0.305, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10722448930746861, |
| "grad_norm": 0.7251397159069465, |
| "learning_rate": 4.923420489588677e-06, |
| "loss": 0.3005, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11105393535416393, |
| "grad_norm": 0.7899848638296046, |
| "learning_rate": 4.915615534559545e-06, |
| "loss": 0.3036, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11488338140085923, |
| "grad_norm": 0.8159662777762657, |
| "learning_rate": 4.907438706583818e-06, |
| "loss": 0.2997, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11871282744755454, |
| "grad_norm": 0.7496451104541622, |
| "learning_rate": 4.898891264446709e-06, |
| "loss": 0.2984, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12254227349424984, |
| "grad_norm": 0.7090938182919049, |
| "learning_rate": 4.889974523987784e-06, |
| "loss": 0.3037, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12637171954094514, |
| "grad_norm": 0.7534479964128541, |
| "learning_rate": 4.880689857898392e-06, |
| "loss": 0.2907, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13020116558764047, |
| "grad_norm": 0.7967215720335111, |
| "learning_rate": 4.871038695510347e-06, |
| "loss": 0.3035, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13403061163433577, |
| "grad_norm": 0.8589787634591548, |
| "learning_rate": 4.861022522575892e-06, |
| "loss": 0.2917, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13786005768103107, |
| "grad_norm": 0.77962150007173, |
| "learning_rate": 4.850642881038969e-06, |
| "loss": 0.3019, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14168950372772637, |
| "grad_norm": 0.6913568705438217, |
| "learning_rate": 4.839901368797849e-06, |
| "loss": 0.2987, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1455189497744217, |
| "grad_norm": 0.7245824514430528, |
| "learning_rate": 4.828799639459139e-06, |
| "loss": 0.2996, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.149348395821117, |
| "grad_norm": 0.7331355223021702, |
| "learning_rate": 4.817339402083217e-06, |
| "loss": 0.2958, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1531778418678123, |
| "grad_norm": 0.684066179750134, |
| "learning_rate": 4.805522420921132e-06, |
| "loss": 0.2923, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1570072879145076, |
| "grad_norm": 0.7273451737034062, |
| "learning_rate": 4.793350515143007e-06, |
| "loss": 0.2955, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16083673396120293, |
| "grad_norm": 0.6922184014896344, |
| "learning_rate": 4.780825558557981e-06, |
| "loss": 0.3021, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16466618000789823, |
| "grad_norm": 0.7901617539245519, |
| "learning_rate": 4.767949479325749e-06, |
| "loss": 0.3004, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16849562605459353, |
| "grad_norm": 0.7269661753924206, |
| "learning_rate": 4.754724259659727e-06, |
| "loss": 0.2966, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17232507210128883, |
| "grad_norm": 0.7893201123959417, |
| "learning_rate": 4.741151935521906e-06, |
| "loss": 0.2985, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17615451814798416, |
| "grad_norm": 0.6946007030633581, |
| "learning_rate": 4.727234596309417e-06, |
| "loss": 0.3036, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17998396419467946, |
| "grad_norm": 0.7322226198234377, |
| "learning_rate": 4.71297438453288e-06, |
| "loss": 0.3001, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18381341024137476, |
| "grad_norm": 0.6823377208042828, |
| "learning_rate": 4.69837349548658e-06, |
| "loss": 0.2925, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1876428562880701, |
| "grad_norm": 0.7135771256714908, |
| "learning_rate": 4.683434176910503e-06, |
| "loss": 0.2939, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1914723023347654, |
| "grad_norm": 0.691020843307318, |
| "learning_rate": 4.668158728644315e-06, |
| "loss": 0.2804, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1953017483814607, |
| "grad_norm": 0.6612550303473294, |
| "learning_rate": 4.652549502273305e-06, |
| "loss": 0.2922, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.199131194428156, |
| "grad_norm": 0.7364276724142655, |
| "learning_rate": 4.636608900766372e-06, |
| "loss": 0.2891, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20296064047485132, |
| "grad_norm": 0.6721354413833053, |
| "learning_rate": 4.620339378106103e-06, |
| "loss": 0.2809, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20679008652154662, |
| "grad_norm": 0.6653652380595567, |
| "learning_rate": 4.6037434389109855e-06, |
| "loss": 0.2983, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21061953256824192, |
| "grad_norm": 0.7449501156668481, |
| "learning_rate": 4.586823638049841e-06, |
| "loss": 0.2903, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21444897861493722, |
| "grad_norm": 0.715508013931221, |
| "learning_rate": 4.569582580248509e-06, |
| "loss": 0.2923, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21827842466163255, |
| "grad_norm": 0.6641963861933619, |
| "learning_rate": 4.552022919688861e-06, |
| "loss": 0.2924, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.22210787070832785, |
| "grad_norm": 0.7113518461330477, |
| "learning_rate": 4.534147359600211e-06, |
| "loss": 0.2819, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22593731675502315, |
| "grad_norm": 0.7089839605701392, |
| "learning_rate": 4.515958651843151e-06, |
| "loss": 0.2939, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.22976676280171845, |
| "grad_norm": 0.677078858734863, |
| "learning_rate": 4.497459596485924e-06, |
| "loss": 0.2835, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.23359620884841378, |
| "grad_norm": 0.7703623199956556, |
| "learning_rate": 4.478653041373371e-06, |
| "loss": 0.2854, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.23742565489510908, |
| "grad_norm": 0.6407221872558565, |
| "learning_rate": 4.459541881688501e-06, |
| "loss": 0.2872, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.24125510094180438, |
| "grad_norm": 0.7625245912179776, |
| "learning_rate": 4.440129059506808e-06, |
| "loss": 0.2852, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24508454698849969, |
| "grad_norm": 0.6533872161632752, |
| "learning_rate": 4.420417563343347e-06, |
| "loss": 0.2883, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.248913993035195, |
| "grad_norm": 0.6347508565680315, |
| "learning_rate": 4.40041042769266e-06, |
| "loss": 0.2818, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2527434390818903, |
| "grad_norm": 0.6956608959261769, |
| "learning_rate": 4.380110732561636e-06, |
| "loss": 0.2858, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2565728851285856, |
| "grad_norm": 0.7174236888577228, |
| "learning_rate": 4.3595216029953575e-06, |
| "loss": 0.2948, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.26040233117528094, |
| "grad_norm": 0.6538209955447881, |
| "learning_rate": 4.338646208596009e-06, |
| "loss": 0.2901, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2642317772219762, |
| "grad_norm": 0.6777945072272051, |
| "learning_rate": 4.317487763034936e-06, |
| "loss": 0.2848, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.26806122326867154, |
| "grad_norm": 0.6915180680337352, |
| "learning_rate": 4.296049523557917e-06, |
| "loss": 0.294, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2718906693153669, |
| "grad_norm": 0.6811198761407046, |
| "learning_rate": 4.274334790483718e-06, |
| "loss": 0.2925, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.27572011536206215, |
| "grad_norm": 0.6682149146681646, |
| "learning_rate": 4.2523469066960295e-06, |
| "loss": 0.2832, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2795495614087575, |
| "grad_norm": 0.6343382096231662, |
| "learning_rate": 4.230089257128842e-06, |
| "loss": 0.2865, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.28337900745545275, |
| "grad_norm": 0.7142478024296977, |
| "learning_rate": 4.207565268245356e-06, |
| "loss": 0.2852, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2872084535021481, |
| "grad_norm": 0.6678411720839094, |
| "learning_rate": 4.184778407510484e-06, |
| "loss": 0.2924, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2910378995488434, |
| "grad_norm": 0.7689563352484293, |
| "learning_rate": 4.16173218285706e-06, |
| "loss": 0.2901, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2948673455955387, |
| "grad_norm": 0.749864633773232, |
| "learning_rate": 4.138430142145805e-06, |
| "loss": 0.2839, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.298696791642234, |
| "grad_norm": 0.6699595192464245, |
| "learning_rate": 4.114875872619147e-06, |
| "loss": 0.2951, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.30252623768892933, |
| "grad_norm": 0.691214596956005, |
| "learning_rate": 4.091073000348989e-06, |
| "loss": 0.2874, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3063556837356246, |
| "grad_norm": 0.6554956219244137, |
| "learning_rate": 4.067025189678485e-06, |
| "loss": 0.286, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31018512978231993, |
| "grad_norm": 0.6954151666813602, |
| "learning_rate": 4.042736142657936e-06, |
| "loss": 0.2834, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3140145758290152, |
| "grad_norm": 0.7196715435903528, |
| "learning_rate": 4.018209598474869e-06, |
| "loss": 0.284, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.31784402187571054, |
| "grad_norm": 0.7723328668264622, |
| "learning_rate": 3.9934493328784185e-06, |
| "loss": 0.2777, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.32167346792240586, |
| "grad_norm": 0.6919247319317805, |
| "learning_rate": 3.9684591575980546e-06, |
| "loss": 0.2893, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.32550291396910114, |
| "grad_norm": 0.612288507127871, |
| "learning_rate": 3.943242919756792e-06, |
| "loss": 0.2891, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.32933236001579647, |
| "grad_norm": 0.7304106009933916, |
| "learning_rate": 3.917804501278942e-06, |
| "loss": 0.2838, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3331618060624918, |
| "grad_norm": 0.6961425637816675, |
| "learning_rate": 3.892147818292505e-06, |
| "loss": 0.2818, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.33699125210918707, |
| "grad_norm": 0.6415090586497654, |
| "learning_rate": 3.866276820526305e-06, |
| "loss": 0.2826, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3408206981558824, |
| "grad_norm": 0.7319097656364029, |
| "learning_rate": 3.840195490701943e-06, |
| "loss": 0.2797, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.34465014420257767, |
| "grad_norm": 0.6643906637801983, |
| "learning_rate": 3.8139078439206755e-06, |
| "loss": 0.2823, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.348479590249273, |
| "grad_norm": 0.651442028320178, |
| "learning_rate": 3.787417927045315e-06, |
| "loss": 0.2845, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3523090362959683, |
| "grad_norm": 0.703399820267242, |
| "learning_rate": 3.760729818077224e-06, |
| "loss": 0.2782, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3561384823426636, |
| "grad_norm": 0.6339374061657803, |
| "learning_rate": 3.7338476255285295e-06, |
| "loss": 0.2809, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3599679283893589, |
| "grad_norm": 0.6650804278367294, |
| "learning_rate": 3.7067754877896388e-06, |
| "loss": 0.288, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.36379737443605425, |
| "grad_norm": 0.6645625939049019, |
| "learning_rate": 3.6795175724921506e-06, |
| "loss": 0.2821, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.36762682048274953, |
| "grad_norm": 0.6819651400048093, |
| "learning_rate": 3.652078075867267e-06, |
| "loss": 0.2759, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.37145626652944486, |
| "grad_norm": 0.6767426872168217, |
| "learning_rate": 3.624461222099804e-06, |
| "loss": 0.28, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3752857125761402, |
| "grad_norm": 0.7310278431962209, |
| "learning_rate": 3.596671262677898e-06, |
| "loss": 0.2883, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.37911515862283546, |
| "grad_norm": 0.6791506792289325, |
| "learning_rate": 3.5687124757385084e-06, |
| "loss": 0.2885, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3829446046695308, |
| "grad_norm": 0.7086896739862765, |
| "learning_rate": 3.5405891654088154e-06, |
| "loss": 0.2815, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.38677405071622606, |
| "grad_norm": 0.6280354477468107, |
| "learning_rate": 3.5123056611436224e-06, |
| "loss": 0.2807, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3906034967629214, |
| "grad_norm": 0.6538192887744636, |
| "learning_rate": 3.4838663170588573e-06, |
| "loss": 0.2723, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3944329428096167, |
| "grad_norm": 0.6999366929435851, |
| "learning_rate": 3.455275511261272e-06, |
| "loss": 0.2804, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.398262388856312, |
| "grad_norm": 0.6406720215436563, |
| "learning_rate": 3.4265376451744564e-06, |
| "loss": 0.2776, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4020918349030073, |
| "grad_norm": 0.6809060571142534, |
| "learning_rate": 3.3976571428612583e-06, |
| "loss": 0.2823, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.40592128094970265, |
| "grad_norm": 0.6506142391402524, |
| "learning_rate": 3.3686384503427177e-06, |
| "loss": 0.2785, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4097507269963979, |
| "grad_norm": 0.6623408933951855, |
| "learning_rate": 3.339486034913627e-06, |
| "loss": 0.2781, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.41358017304309325, |
| "grad_norm": 0.675859964827601, |
| "learning_rate": 3.310204384454805e-06, |
| "loss": 0.2776, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4174096190897885, |
| "grad_norm": 0.7354171342985883, |
| "learning_rate": 3.280798006742213e-06, |
| "loss": 0.2929, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.42123906513648385, |
| "grad_norm": 0.6457062459707484, |
| "learning_rate": 3.2512714287530007e-06, |
| "loss": 0.2743, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4250685111831792, |
| "grad_norm": 0.641329871481826, |
| "learning_rate": 3.2216291959686007e-06, |
| "loss": 0.2737, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.42889795722987445, |
| "grad_norm": 0.6566421664911357, |
| "learning_rate": 3.191875871674971e-06, |
| "loss": 0.2838, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4327274032765698, |
| "grad_norm": 0.6027701960697498, |
| "learning_rate": 3.162016036260098e-06, |
| "loss": 0.2752, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4365568493232651, |
| "grad_norm": 0.648907215118306, |
| "learning_rate": 3.1320542865088695e-06, |
| "loss": 0.2667, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4403862953699604, |
| "grad_norm": 0.6077801768682932, |
| "learning_rate": 3.1019952348954163e-06, |
| "loss": 0.2747, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4442157414166557, |
| "grad_norm": 0.6576708995713357, |
| "learning_rate": 3.071843508873046e-06, |
| "loss": 0.2836, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.448045187463351, |
| "grad_norm": 0.695646974082748, |
| "learning_rate": 3.0416037501618676e-06, |
| "loss": 0.2732, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4518746335100463, |
| "grad_norm": 0.616879255204133, |
| "learning_rate": 3.0112806140342176e-06, |
| "loss": 0.2759, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.45570407955674164, |
| "grad_norm": 0.6008817125153927, |
| "learning_rate": 2.9808787685980054e-06, |
| "loss": 0.2769, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4595335256034369, |
| "grad_norm": 0.5972779982979275, |
| "learning_rate": 2.9504028940780777e-06, |
| "loss": 0.2836, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.46336297165013224, |
| "grad_norm": 0.6541064777899037, |
| "learning_rate": 2.9198576820957188e-06, |
| "loss": 0.2678, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.46719241769682757, |
| "grad_norm": 0.7461980100750918, |
| "learning_rate": 2.8892478349463987e-06, |
| "loss": 0.279, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.47102186374352284, |
| "grad_norm": 0.697749095404112, |
| "learning_rate": 2.8585780648758745e-06, |
| "loss": 0.2774, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.47485130979021817, |
| "grad_norm": 0.6901917627087478, |
| "learning_rate": 2.827853093354763e-06, |
| "loss": 0.2731, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.47868075583691344, |
| "grad_norm": 0.6167921413072504, |
| "learning_rate": 2.79707765035169e-06, |
| "loss": 0.2781, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.48251020188360877, |
| "grad_norm": 0.6877679962661163, |
| "learning_rate": 2.7662564736051378e-06, |
| "loss": 0.2779, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4863396479303041, |
| "grad_norm": 0.6529732798686552, |
| "learning_rate": 2.7353943078940876e-06, |
| "loss": 0.2755, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.49016909397699937, |
| "grad_norm": 0.573925656257561, |
| "learning_rate": 2.7044959043075815e-06, |
| "loss": 0.2781, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4939985400236947, |
| "grad_norm": 0.7060010201146872, |
| "learning_rate": 2.67356601951332e-06, |
| "loss": 0.2885, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.49782798607039, |
| "grad_norm": 0.6706570115076154, |
| "learning_rate": 2.64260941502539e-06, |
| "loss": 0.2823, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5016574321170854, |
| "grad_norm": 0.6816750561081921, |
| "learning_rate": 2.611630856471252e-06, |
| "loss": 0.2734, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5054868781637806, |
| "grad_norm": 0.6503962781113031, |
| "learning_rate": 2.5806351128580963e-06, |
| "loss": 0.2775, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5093163242104759, |
| "grad_norm": 0.6770280496930409, |
| "learning_rate": 2.549626955838673e-06, |
| "loss": 0.2805, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5131457702571712, |
| "grad_norm": 0.6025144898257462, |
| "learning_rate": 2.5186111589767187e-06, |
| "loss": 0.2715, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5169752163038666, |
| "grad_norm": 0.6707574673231309, |
| "learning_rate": 2.487592497012089e-06, |
| "loss": 0.2763, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5208046623505619, |
| "grad_norm": 0.6063466305966893, |
| "learning_rate": 2.456575745125713e-06, |
| "loss": 0.2845, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5246341083972571, |
| "grad_norm": 0.6060316435488056, |
| "learning_rate": 2.4255656782044644e-06, |
| "loss": 0.2772, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5284635544439524, |
| "grad_norm": 0.6023582378786108, |
| "learning_rate": 2.3945670701061033e-06, |
| "loss": 0.267, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5322930004906478, |
| "grad_norm": 0.672852399998615, |
| "learning_rate": 2.3635846929243536e-06, |
| "loss": 0.2757, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5361224465373431, |
| "grad_norm": 0.671673917828738, |
| "learning_rate": 2.3326233162542655e-06, |
| "loss": 0.2772, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5399518925840384, |
| "grad_norm": 0.6908191182674716, |
| "learning_rate": 2.3016877064579564e-06, |
| "loss": 0.2752, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5437813386307337, |
| "grad_norm": 0.6483112345732687, |
| "learning_rate": 2.2707826259308493e-06, |
| "loss": 0.2773, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.547610784677429, |
| "grad_norm": 0.6527686650275873, |
| "learning_rate": 2.2399128323685287e-06, |
| "loss": 0.2711, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5514402307241243, |
| "grad_norm": 0.6402708780992856, |
| "learning_rate": 2.2090830780343116e-06, |
| "loss": 0.2774, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5552696767708196, |
| "grad_norm": 0.7121133583105477, |
| "learning_rate": 2.178298109027659e-06, |
| "loss": 0.2789, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.559099122817515, |
| "grad_norm": 0.6936411537357103, |
| "learning_rate": 2.147562664553537e-06, |
| "loss": 0.2744, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5629285688642103, |
| "grad_norm": 0.6126488504445933, |
| "learning_rate": 2.116881476192834e-06, |
| "loss": 0.2698, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5667580149109055, |
| "grad_norm": 0.6045796996159061, |
| "learning_rate": 2.086259267173961e-06, |
| "loss": 0.2756, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5705874609576008, |
| "grad_norm": 0.6331045988234285, |
| "learning_rate": 2.0557007516457287e-06, |
| "loss": 0.2813, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5744169070042962, |
| "grad_norm": 0.6250063298087053, |
| "learning_rate": 2.025210633951627e-06, |
| "loss": 0.2659, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5782463530509915, |
| "grad_norm": 0.6244702166810576, |
| "learning_rate": 1.9947936079056118e-06, |
| "loss": 0.2691, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5820757990976868, |
| "grad_norm": 0.6645304037867747, |
| "learning_rate": 1.964454356069514e-06, |
| "loss": 0.2653, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5859052451443821, |
| "grad_norm": 0.6422648719414517, |
| "learning_rate": 1.934197549032183e-06, |
| "loss": 0.2753, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5897346911910774, |
| "grad_norm": 0.6796801504650317, |
| "learning_rate": 1.904027844690468e-06, |
| "loss": 0.2756, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5935641372377727, |
| "grad_norm": 0.612092392514174, |
| "learning_rate": 1.8739498875321563e-06, |
| "loss": 0.2781, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.597393583284468, |
| "grad_norm": 0.6072776324810985, |
| "learning_rate": 1.8439683079209789e-06, |
| "loss": 0.2762, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6012230293311633, |
| "grad_norm": 0.6756861517531914, |
| "learning_rate": 1.8140877213837823e-06, |
| "loss": 0.2671, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6050524753778587, |
| "grad_norm": 0.6297494292950692, |
| "learning_rate": 1.7843127278999944e-06, |
| "loss": 0.2656, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6088819214245539, |
| "grad_norm": 0.6384980188983074, |
| "learning_rate": 1.7546479111934733e-06, |
| "loss": 0.2742, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6127113674712492, |
| "grad_norm": 0.6354445299662702, |
| "learning_rate": 1.7250978380268696e-06, |
| "loss": 0.2703, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6165408135179445, |
| "grad_norm": 0.6499360638842633, |
| "learning_rate": 1.6956670574985909e-06, |
| "loss": 0.2778, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6203702595646399, |
| "grad_norm": 0.6611350047172592, |
| "learning_rate": 1.6663601003424884e-06, |
| "loss": 0.2751, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6241997056113352, |
| "grad_norm": 0.6676095929381155, |
| "learning_rate": 1.6371814782303723e-06, |
| "loss": 0.2697, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6280291516580304, |
| "grad_norm": 0.6627616428191541, |
| "learning_rate": 1.6081356830774625e-06, |
| "loss": 0.2728, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6318585977047257, |
| "grad_norm": 0.6297343768461555, |
| "learning_rate": 1.5792271863508751e-06, |
| "loss": 0.2725, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6356880437514211, |
| "grad_norm": 0.6164109978910287, |
| "learning_rate": 1.5504604383812646e-06, |
| "loss": 0.2665, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6395174897981164, |
| "grad_norm": 0.6163778395985405, |
| "learning_rate": 1.5218398676777103e-06, |
| "loss": 0.2676, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6433469358448117, |
| "grad_norm": 0.7111914438547673, |
| "learning_rate": 1.493369880245973e-06, |
| "loss": 0.2682, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6471763818915071, |
| "grad_norm": 0.5951988021270335, |
| "learning_rate": 1.4650548589102092e-06, |
| "loss": 0.2725, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6510058279382023, |
| "grad_norm": 0.615884409391351, |
| "learning_rate": 1.436899162638255e-06, |
| "loss": 0.2693, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6548352739848976, |
| "grad_norm": 0.5615244588845161, |
| "learning_rate": 1.4089071258705782e-06, |
| "loss": 0.2717, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6586647200315929, |
| "grad_norm": 0.613522242640938, |
| "learning_rate": 1.3810830578530226e-06, |
| "loss": 0.2645, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6624941660782883, |
| "grad_norm": 0.6255188764708021, |
| "learning_rate": 1.3534312419734066e-06, |
| "loss": 0.2619, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6663236121249836, |
| "grad_norm": 0.6219089714484455, |
| "learning_rate": 1.3259559351021249e-06, |
| "loss": 0.2706, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6701530581716788, |
| "grad_norm": 0.7068243904113033, |
| "learning_rate": 1.2986613669368159e-06, |
| "loss": 0.2724, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6739825042183741, |
| "grad_norm": 0.6239904301630281, |
| "learning_rate": 1.2715517393512239e-06, |
| "loss": 0.2699, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6778119502650695, |
| "grad_norm": 0.636573560785032, |
| "learning_rate": 1.2446312257483358e-06, |
| "loss": 0.2606, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6816413963117648, |
| "grad_norm": 0.5822319215351535, |
| "learning_rate": 1.2179039704179119e-06, |
| "loss": 0.2671, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6854708423584601, |
| "grad_norm": 0.6414100583030806, |
| "learning_rate": 1.1913740878984818e-06, |
| "loss": 0.2728, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6893002884051553, |
| "grad_norm": 0.6019132802768392, |
| "learning_rate": 1.1650456623439368e-06, |
| "loss": 0.2684, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6931297344518507, |
| "grad_norm": 0.6936171991583808, |
| "learning_rate": 1.1389227468947905e-06, |
| "loss": 0.271, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.696959180498546, |
| "grad_norm": 0.621045757650744, |
| "learning_rate": 1.11300936305422e-06, |
| "loss": 0.2657, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7007886265452413, |
| "grad_norm": 0.6554333537644303, |
| "learning_rate": 1.0873095000689676e-06, |
| "loss": 0.2666, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7046180725919367, |
| "grad_norm": 0.6286228416556564, |
| "learning_rate": 1.0618271143152185e-06, |
| "loss": 0.2714, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.708447518638632, |
| "grad_norm": 0.6221920366356362, |
| "learning_rate": 1.0365661286895364e-06, |
| "loss": 0.2672, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7122769646853272, |
| "grad_norm": 0.6038564143991549, |
| "learning_rate": 1.011530432004948e-06, |
| "loss": 0.2639, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7161064107320225, |
| "grad_norm": 0.6754360798564671, |
| "learning_rate": 9.86723878392279e-07, |
| "loss": 0.2675, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7199358567787179, |
| "grad_norm": 0.5875424854805807, |
| "learning_rate": 9.621502867068286e-07, |
| "loss": 0.2592, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7237653028254132, |
| "grad_norm": 0.6032756771398841, |
| "learning_rate": 9.378134399404768e-07, |
| "loss": 0.2676, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7275947488721085, |
| "grad_norm": 0.597275111662435, |
| "learning_rate": 9.137170846393054e-07, |
| "loss": 0.268, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7314241949188037, |
| "grad_norm": 0.610745344226226, |
| "learning_rate": 8.898649303268373e-07, |
| "loss": 0.2752, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7352536409654991, |
| "grad_norm": 0.7020431210410311, |
| "learning_rate": 8.662606489329712e-07, |
| "loss": 0.2793, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7390830870121944, |
| "grad_norm": 0.6300673226887155, |
| "learning_rate": 8.429078742287072e-07, |
| "loss": 0.2673, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7429125330588897, |
| "grad_norm": 0.6440907203937188, |
| "learning_rate": 8.198102012667409e-07, |
| "loss": 0.2662, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.746741979105585, |
| "grad_norm": 0.6258159414377766, |
| "learning_rate": 7.969711858280251e-07, |
| "loss": 0.2712, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7505714251522804, |
| "grad_norm": 0.5989574886855157, |
| "learning_rate": 7.743943438743676e-07, |
| "loss": 0.2634, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7544008711989756, |
| "grad_norm": 0.7834580590666472, |
| "learning_rate": 7.520831510071744e-07, |
| "loss": 0.2632, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7582303172456709, |
| "grad_norm": 0.6728771166977499, |
| "learning_rate": 7.30041041932387e-07, |
| "loss": 0.2756, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7620597632923662, |
| "grad_norm": 0.5993928197071544, |
| "learning_rate": 7.082714099317334e-07, |
| "loss": 0.2664, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7658892093390616, |
| "grad_norm": 0.5848853677834179, |
| "learning_rate": 6.867776063403411e-07, |
| "loss": 0.2628, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7697186553857569, |
| "grad_norm": 0.629330394414076, |
| "learning_rate": 6.655629400308191e-07, |
| "loss": 0.2658, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7735481014324521, |
| "grad_norm": 0.5924525230054234, |
| "learning_rate": 6.44630676903869e-07, |
| "loss": 0.2669, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7773775474791474, |
| "grad_norm": 0.6760063055600575, |
| "learning_rate": 6.239840393855185e-07, |
| "loss": 0.2692, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7812069935258428, |
| "grad_norm": 0.6602651610212074, |
| "learning_rate": 6.036262059310383e-07, |
| "loss": 0.2629, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7850364395725381, |
| "grad_norm": 0.6327130212823728, |
| "learning_rate": 5.835603105356396e-07, |
| "loss": 0.2678, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7888658856192334, |
| "grad_norm": 0.68617187633267, |
| "learning_rate": 5.637894422520027e-07, |
| "loss": 0.268, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7926953316659286, |
| "grad_norm": 0.6854582170736043, |
| "learning_rate": 5.443166447147392e-07, |
| "loss": 0.2652, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.796524777712624, |
| "grad_norm": 0.6454661983770266, |
| "learning_rate": 5.251449156718313e-07, |
| "loss": 0.2616, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8003542237593193, |
| "grad_norm": 0.6204018065147047, |
| "learning_rate": 5.062772065231492e-07, |
| "loss": 0.2664, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8041836698060146, |
| "grad_norm": 0.6500266139560574, |
| "learning_rate": 4.877164218660901e-07, |
| "loss": 0.2656, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.80801311585271, |
| "grad_norm": 0.6419744527723766, |
| "learning_rate": 4.694654190484327e-07, |
| "loss": 0.2612, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8118425618994053, |
| "grad_norm": 0.6238307107139875, |
| "learning_rate": 4.5152700772845947e-07, |
| "loss": 0.2676, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8156720079461005, |
| "grad_norm": 0.6514460031994411, |
| "learning_rate": 4.339039494424263e-07, |
| "loss": 0.2755, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8195014539927958, |
| "grad_norm": 0.5974777353730542, |
| "learning_rate": 4.16598957179431e-07, |
| "loss": 0.2597, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8233309000394912, |
| "grad_norm": 0.6009135110985171, |
| "learning_rate": 3.9961469496376584e-07, |
| "loss": 0.2592, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8271603460861865, |
| "grad_norm": 0.6454195389454723, |
| "learning_rate": 3.829537774448e-07, |
| "loss": 0.2714, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8309897921328818, |
| "grad_norm": 0.5913764986372555, |
| "learning_rate": 3.6661876949447006e-07, |
| "loss": 0.2637, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.834819238179577, |
| "grad_norm": 0.6714610867965414, |
| "learning_rate": 3.506121858124253e-07, |
| "loss": 0.2652, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8386486842262724, |
| "grad_norm": 0.6260094170090917, |
| "learning_rate": 3.3493649053890325e-07, |
| "loss": 0.2642, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8424781302729677, |
| "grad_norm": 0.5797985857131752, |
| "learning_rate": 3.1959409687538854e-07, |
| "loss": 0.2632, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.846307576319663, |
| "grad_norm": 0.6189077791942977, |
| "learning_rate": 3.04587366713108e-07, |
| "loss": 0.2648, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8501370223663584, |
| "grad_norm": 0.6122894183484023, |
| "learning_rate": 2.8991861026943015e-07, |
| "loss": 0.2741, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8539664684130536, |
| "grad_norm": 0.6661899719747609, |
| "learning_rate": 2.755900857322172e-07, |
| "loss": 0.2645, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8577959144597489, |
| "grad_norm": 0.5568358628059588, |
| "learning_rate": 2.616039989121899e-07, |
| "loss": 0.2546, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8616253605064442, |
| "grad_norm": 0.6975565699445695, |
| "learning_rate": 2.479625029033489e-07, |
| "loss": 0.2774, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8654548065531396, |
| "grad_norm": 0.6137153739809891, |
| "learning_rate": 2.3466769775151887e-07, |
| "loss": 0.266, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8692842525998349, |
| "grad_norm": 0.6065270965865103, |
| "learning_rate": 2.21721630131054e-07, |
| "loss": 0.2717, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8731136986465302, |
| "grad_norm": 0.6194228612620867, |
| "learning_rate": 2.0912629302976494e-07, |
| "loss": 0.2656, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8769431446932254, |
| "grad_norm": 0.6721041148274991, |
| "learning_rate": 1.968836254421036e-07, |
| "loss": 0.2653, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8807725907399208, |
| "grad_norm": 0.664610505857682, |
| "learning_rate": 1.849955120706673e-07, |
| "loss": 0.2677, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8846020367866161, |
| "grad_norm": 0.6321996875856193, |
| "learning_rate": 1.734637830360536e-07, |
| "loss": 0.2645, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8884314828333114, |
| "grad_norm": 0.6365093137379149, |
| "learning_rate": 1.6229021359512626e-07, |
| "loss": 0.2658, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8922609288800067, |
| "grad_norm": 0.6539770975459238, |
| "learning_rate": 1.514765238677185e-07, |
| "loss": 0.259, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.896090374926702, |
| "grad_norm": 0.5990361778489132, |
| "learning_rate": 1.4102437857183155e-07, |
| "loss": 0.265, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8999198209733973, |
| "grad_norm": 0.6484099138497742, |
| "learning_rate": 1.30935386767356e-07, |
| "loss": 0.2667, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9037492670200926, |
| "grad_norm": 0.5867117908179056, |
| "learning_rate": 1.2121110160836697e-07, |
| "loss": 0.2634, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.907578713066788, |
| "grad_norm": 0.5796597988618597, |
| "learning_rate": 1.1185302010402105e-07, |
| "loss": 0.2719, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9114081591134833, |
| "grad_norm": 0.6237999983224305, |
| "learning_rate": 1.0286258288810108e-07, |
| "loss": 0.2627, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9152376051601786, |
| "grad_norm": 0.603524669140342, |
| "learning_rate": 9.424117399723432e-08, |
| "loss": 0.262, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.9190670512068738, |
| "grad_norm": 0.620014776939978, |
| "learning_rate": 8.599012065782924e-08, |
| "loss": 0.271, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9228964972535691, |
| "grad_norm": 0.6276616849770017, |
| "learning_rate": 7.811069308175156e-08, |
| "loss": 0.2692, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9267259433002645, |
| "grad_norm": 0.6160811115395115, |
| "learning_rate": 7.060410427078473e-08, |
| "loss": 0.2674, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9305553893469598, |
| "grad_norm": 0.5848887503471439, |
| "learning_rate": 6.347150982989159e-08, |
| "loss": 0.2625, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9343848353936551, |
| "grad_norm": 0.5883046146508241, |
| "learning_rate": 5.6714007789314686e-08, |
| "loss": 0.2621, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9382142814403504, |
| "grad_norm": 0.6940261485803617, |
| "learning_rate": 5.033263843554015e-08, |
| "loss": 0.2646, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9420437274870457, |
| "grad_norm": 0.6682532368250712, |
| "learning_rate": 4.4328384151149094e-08, |
| "loss": 0.2667, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.945873173533741, |
| "grad_norm": 0.6267836020326658, |
| "learning_rate": 3.870216926358555e-08, |
| "loss": 0.2643, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9497026195804363, |
| "grad_norm": 0.5575161118815479, |
| "learning_rate": 3.3454859902860295e-08, |
| "loss": 0.2641, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9535320656271317, |
| "grad_norm": 0.5714423960482532, |
| "learning_rate": 2.858726386821359e-08, |
| "loss": 0.2707, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9573615116738269, |
| "grad_norm": 0.6111083062299648, |
| "learning_rate": 2.410013050375859e-08, |
| "loss": 0.2709, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9611909577205222, |
| "grad_norm": 0.642938384488523, |
| "learning_rate": 1.999415058312276e-08, |
| "loss": 0.271, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9650204037672175, |
| "grad_norm": 0.6112837503032897, |
| "learning_rate": 1.6269956203107117e-08, |
| "loss": 0.2512, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9688498498139129, |
| "grad_norm": 0.5889833725999716, |
| "learning_rate": 1.2928120686377388e-08, |
| "loss": 0.2661, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9726792958606082, |
| "grad_norm": 0.5773801445867635, |
| "learning_rate": 9.969158493204067e-09, |
| "loss": 0.2653, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9765087419073035, |
| "grad_norm": 0.6062759480472552, |
| "learning_rate": 7.393525142262992e-09, |
| "loss": 0.2691, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9803381879539987, |
| "grad_norm": 0.5770795227743729, |
| "learning_rate": 5.201617140510318e-09, |
| "loss": 0.2694, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9841676340006941, |
| "grad_norm": 0.6054174692665726, |
| "learning_rate": 3.3937719221427413e-09, |
| "loss": 0.2592, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.9879970800473894, |
| "grad_norm": 0.5960623948954498, |
| "learning_rate": 1.9702677966507157e-09, |
| "loss": 0.2641, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9918265260940847, |
| "grad_norm": 0.6702560926553222, |
| "learning_rate": 9.31323905974113e-10, |
| "loss": 0.2621, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.99565597214078, |
| "grad_norm": 0.623171089038422, |
| "learning_rate": 2.7710019076532257e-10, |
| "loss": 0.2672, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9994854181874753, |
| "grad_norm": 0.6269492106617217, |
| "learning_rate": 7.697365768943865e-12, |
| "loss": 0.2736, |
| "step": 2610 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2611, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 600, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 624569162006528.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|