| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9991487623003845, | |
| "eval_steps": 500, | |
| "global_step": 2751, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010895842555075079, | |
| "grad_norm": 2.5097851753234863, | |
| "learning_rate": 1.9927299163940386e-05, | |
| "loss": 0.51, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.021791685110150158, | |
| "grad_norm": 1.4778149127960205, | |
| "learning_rate": 1.985459832788077e-05, | |
| "loss": 0.0818, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03268752766522524, | |
| "grad_norm": 1.0869137048721313, | |
| "learning_rate": 1.978189749182116e-05, | |
| "loss": 0.1052, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.043583370220300316, | |
| "grad_norm": 1.0245683193206787, | |
| "learning_rate": 1.970919665576154e-05, | |
| "loss": 0.0508, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.054479212775375395, | |
| "grad_norm": 3.580275535583496, | |
| "learning_rate": 1.963649581970193e-05, | |
| "loss": 0.0336, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06537505533045047, | |
| "grad_norm": 13.708664894104004, | |
| "learning_rate": 1.9563794983642313e-05, | |
| "loss": 0.0247, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07627089788552556, | |
| "grad_norm": 2.38472843170166, | |
| "learning_rate": 1.9491094147582698e-05, | |
| "loss": 0.0474, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08716674044060063, | |
| "grad_norm": 2.3008601665496826, | |
| "learning_rate": 1.9418393311523086e-05, | |
| "loss": 0.0505, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09806258299567572, | |
| "grad_norm": 0.9727557301521301, | |
| "learning_rate": 1.9345692475463468e-05, | |
| "loss": 0.0291, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10895842555075079, | |
| "grad_norm": 0.017909426242113113, | |
| "learning_rate": 1.9272991639403856e-05, | |
| "loss": 0.0232, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11985426810582588, | |
| "grad_norm": 0.13610009849071503, | |
| "learning_rate": 1.920029080334424e-05, | |
| "loss": 0.0679, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13075011066090095, | |
| "grad_norm": 0.09146017581224442, | |
| "learning_rate": 1.9127589967284625e-05, | |
| "loss": 0.062, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14164595321597603, | |
| "grad_norm": 3.696361541748047, | |
| "learning_rate": 1.9054889131225013e-05, | |
| "loss": 0.0515, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.15254179577105112, | |
| "grad_norm": 0.07528296858072281, | |
| "learning_rate": 1.8982188295165395e-05, | |
| "loss": 0.008, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16343763832612618, | |
| "grad_norm": 0.3899368345737457, | |
| "learning_rate": 1.8909487459105783e-05, | |
| "loss": 0.0309, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17433348088120126, | |
| "grad_norm": 0.5960955619812012, | |
| "learning_rate": 1.8836786623046168e-05, | |
| "loss": 0.0415, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18522932343627635, | |
| "grad_norm": 0.027237065136432648, | |
| "learning_rate": 1.8764085786986552e-05, | |
| "loss": 0.0257, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.19612516599135144, | |
| "grad_norm": 6.851381778717041, | |
| "learning_rate": 1.8691384950926937e-05, | |
| "loss": 0.0585, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2070210085464265, | |
| "grad_norm": 1.518951416015625, | |
| "learning_rate": 1.8618684114867322e-05, | |
| "loss": 0.0173, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21791685110150158, | |
| "grad_norm": 16.334980010986328, | |
| "learning_rate": 1.854598327880771e-05, | |
| "loss": 0.0892, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22881269365657667, | |
| "grad_norm": 0.6327227354049683, | |
| "learning_rate": 1.847328244274809e-05, | |
| "loss": 0.0391, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.23970853621165175, | |
| "grad_norm": 0.026528311893343925, | |
| "learning_rate": 1.840058160668848e-05, | |
| "loss": 0.0316, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2506043787667268, | |
| "grad_norm": 0.15849582850933075, | |
| "learning_rate": 1.8327880770628864e-05, | |
| "loss": 0.0306, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2615002213218019, | |
| "grad_norm": 8.983612060546875, | |
| "learning_rate": 1.825517993456925e-05, | |
| "loss": 0.0252, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.272396063876877, | |
| "grad_norm": 1.4300966262817383, | |
| "learning_rate": 1.8182479098509634e-05, | |
| "loss": 0.0307, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.28329190643195207, | |
| "grad_norm": 0.19248631596565247, | |
| "learning_rate": 1.810977826245002e-05, | |
| "loss": 0.034, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.29418774898702715, | |
| "grad_norm": 0.0807420164346695, | |
| "learning_rate": 1.8037077426390407e-05, | |
| "loss": 0.0218, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.30508359154210224, | |
| "grad_norm": 0.04030030593276024, | |
| "learning_rate": 1.796437659033079e-05, | |
| "loss": 0.0164, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.31597943409717727, | |
| "grad_norm": 0.03919893503189087, | |
| "learning_rate": 1.7891675754271176e-05, | |
| "loss": 0.0207, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.32687527665225236, | |
| "grad_norm": 0.9118878245353699, | |
| "learning_rate": 1.781897491821156e-05, | |
| "loss": 0.0254, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.33777111920732744, | |
| "grad_norm": 0.09405702352523804, | |
| "learning_rate": 1.7746274082151945e-05, | |
| "loss": 0.0072, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3486669617624025, | |
| "grad_norm": 1.061004638671875, | |
| "learning_rate": 1.7673573246092334e-05, | |
| "loss": 0.0178, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3595628043174776, | |
| "grad_norm": 0.35136711597442627, | |
| "learning_rate": 1.7600872410032715e-05, | |
| "loss": 0.0268, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3704586468725527, | |
| "grad_norm": 0.33769288659095764, | |
| "learning_rate": 1.7528171573973103e-05, | |
| "loss": 0.0383, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3813544894276278, | |
| "grad_norm": 1.448626160621643, | |
| "learning_rate": 1.7455470737913488e-05, | |
| "loss": 0.0214, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.39225033198270287, | |
| "grad_norm": 1.096685767173767, | |
| "learning_rate": 1.7382769901853873e-05, | |
| "loss": 0.0442, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4031461745377779, | |
| "grad_norm": 0.08582064509391785, | |
| "learning_rate": 1.7310069065794257e-05, | |
| "loss": 0.041, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.414042017092853, | |
| "grad_norm": 0.5726041793823242, | |
| "learning_rate": 1.7237368229734642e-05, | |
| "loss": 0.02, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4249378596479281, | |
| "grad_norm": 0.27912572026252747, | |
| "learning_rate": 1.716466739367503e-05, | |
| "loss": 0.033, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.43583370220300316, | |
| "grad_norm": 0.40194639563560486, | |
| "learning_rate": 1.7091966557615415e-05, | |
| "loss": 0.0297, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.44672954475807825, | |
| "grad_norm": 0.4923015832901001, | |
| "learning_rate": 1.70192657215558e-05, | |
| "loss": 0.0473, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.45762538731315333, | |
| "grad_norm": 0.4864579439163208, | |
| "learning_rate": 1.6946564885496184e-05, | |
| "loss": 0.0335, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4685212298682284, | |
| "grad_norm": 0.0577218122780323, | |
| "learning_rate": 1.687386404943657e-05, | |
| "loss": 0.0267, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4794170724233035, | |
| "grad_norm": 0.026588434353470802, | |
| "learning_rate": 1.6801163213376954e-05, | |
| "loss": 0.0242, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4903129149783786, | |
| "grad_norm": 1.106031060218811, | |
| "learning_rate": 1.6728462377317342e-05, | |
| "loss": 0.0412, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5012087575334536, | |
| "grad_norm": 2.185438394546509, | |
| "learning_rate": 1.6655761541257727e-05, | |
| "loss": 0.0168, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5121046000885288, | |
| "grad_norm": 0.2645202577114105, | |
| "learning_rate": 1.658306070519811e-05, | |
| "loss": 0.0225, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5230004426436038, | |
| "grad_norm": 0.26281026005744934, | |
| "learning_rate": 1.6510359869138496e-05, | |
| "loss": 0.0225, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5338962851986789, | |
| "grad_norm": 0.09611400961875916, | |
| "learning_rate": 1.643765903307888e-05, | |
| "loss": 0.0204, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.544792127753754, | |
| "grad_norm": 0.2964985966682434, | |
| "learning_rate": 1.6364958197019266e-05, | |
| "loss": 0.0192, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.555687970308829, | |
| "grad_norm": 3.2991862297058105, | |
| "learning_rate": 1.629225736095965e-05, | |
| "loss": 0.0395, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5665838128639041, | |
| "grad_norm": 0.9299785494804382, | |
| "learning_rate": 1.621955652490004e-05, | |
| "loss": 0.0213, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5774796554189792, | |
| "grad_norm": 1.7656854391098022, | |
| "learning_rate": 1.6146855688840423e-05, | |
| "loss": 0.0293, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5883754979740543, | |
| "grad_norm": 0.052940454334020615, | |
| "learning_rate": 1.6074154852780808e-05, | |
| "loss": 0.0349, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5992713405291293, | |
| "grad_norm": 0.6700181365013123, | |
| "learning_rate": 1.6001454016721193e-05, | |
| "loss": 0.0098, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6101671830842045, | |
| "grad_norm": 1.4992352724075317, | |
| "learning_rate": 1.5928753180661577e-05, | |
| "loss": 0.0209, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6210630256392795, | |
| "grad_norm": 0.6882705688476562, | |
| "learning_rate": 1.5856052344601966e-05, | |
| "loss": 0.0208, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6319588681943545, | |
| "grad_norm": 0.35566991567611694, | |
| "learning_rate": 1.578335150854235e-05, | |
| "loss": 0.0157, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6428547107494297, | |
| "grad_norm": 0.1365765929222107, | |
| "learning_rate": 1.5710650672482735e-05, | |
| "loss": 0.0207, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6537505533045047, | |
| "grad_norm": 0.010805984027683735, | |
| "learning_rate": 1.563794983642312e-05, | |
| "loss": 0.0386, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6646463958595799, | |
| "grad_norm": 0.33677366375923157, | |
| "learning_rate": 1.5565249000363505e-05, | |
| "loss": 0.0178, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6755422384146549, | |
| "grad_norm": 0.023768046870827675, | |
| "learning_rate": 1.5492548164303893e-05, | |
| "loss": 0.0115, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.68643808096973, | |
| "grad_norm": 1.271041989326477, | |
| "learning_rate": 1.5419847328244274e-05, | |
| "loss": 0.0335, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.697333923524805, | |
| "grad_norm": 0.39303043484687805, | |
| "learning_rate": 1.5347146492184662e-05, | |
| "loss": 0.0456, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7082297660798802, | |
| "grad_norm": 1.5450124740600586, | |
| "learning_rate": 1.5274445656125047e-05, | |
| "loss": 0.0206, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7191256086349552, | |
| "grad_norm": 0.12599903345108032, | |
| "learning_rate": 1.5201744820065432e-05, | |
| "loss": 0.0125, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7300214511900303, | |
| "grad_norm": 0.03158240765333176, | |
| "learning_rate": 1.5129043984005818e-05, | |
| "loss": 0.0019, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7409172937451054, | |
| "grad_norm": 1.2820944786071777, | |
| "learning_rate": 1.5056343147946201e-05, | |
| "loss": 0.0132, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7518131363001804, | |
| "grad_norm": 0.4018807113170624, | |
| "learning_rate": 1.4983642311886588e-05, | |
| "loss": 0.0274, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7627089788552556, | |
| "grad_norm": 0.7147946953773499, | |
| "learning_rate": 1.4910941475826972e-05, | |
| "loss": 0.0207, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7736048214103306, | |
| "grad_norm": 1.3514039516448975, | |
| "learning_rate": 1.4838240639767359e-05, | |
| "loss": 0.0088, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7845006639654057, | |
| "grad_norm": 0.10958287864923477, | |
| "learning_rate": 1.4765539803707745e-05, | |
| "loss": 0.0054, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7953965065204808, | |
| "grad_norm": 0.12291970103979111, | |
| "learning_rate": 1.4692838967648128e-05, | |
| "loss": 0.0154, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8062923490755558, | |
| "grad_norm": 0.056142911314964294, | |
| "learning_rate": 1.4620138131588515e-05, | |
| "loss": 0.0214, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.817188191630631, | |
| "grad_norm": 0.08367596566677094, | |
| "learning_rate": 1.45474372955289e-05, | |
| "loss": 0.0074, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.828084034185706, | |
| "grad_norm": 0.8847033381462097, | |
| "learning_rate": 1.4474736459469286e-05, | |
| "loss": 0.052, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8389798767407811, | |
| "grad_norm": 0.23346182703971863, | |
| "learning_rate": 1.4402035623409672e-05, | |
| "loss": 0.0238, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8498757192958561, | |
| "grad_norm": 0.7445326447486877, | |
| "learning_rate": 1.4329334787350055e-05, | |
| "loss": 0.0179, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8607715618509313, | |
| "grad_norm": 1.623715877532959, | |
| "learning_rate": 1.4256633951290442e-05, | |
| "loss": 0.0138, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8716674044060063, | |
| "grad_norm": 0.12205464392900467, | |
| "learning_rate": 1.4183933115230826e-05, | |
| "loss": 0.0182, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8825632469610815, | |
| "grad_norm": 0.015034107491374016, | |
| "learning_rate": 1.4111232279171211e-05, | |
| "loss": 0.0192, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8934590895161565, | |
| "grad_norm": 1.1116948127746582, | |
| "learning_rate": 1.4038531443111596e-05, | |
| "loss": 0.0329, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9043549320712315, | |
| "grad_norm": 0.35468608140945435, | |
| "learning_rate": 1.3965830607051982e-05, | |
| "loss": 0.0299, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9152507746263067, | |
| "grad_norm": 1.3069281578063965, | |
| "learning_rate": 1.3893129770992369e-05, | |
| "loss": 0.028, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9261466171813817, | |
| "grad_norm": 0.6548961997032166, | |
| "learning_rate": 1.3820428934932752e-05, | |
| "loss": 0.0125, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9370424597364568, | |
| "grad_norm": 0.016538333147764206, | |
| "learning_rate": 1.3747728098873138e-05, | |
| "loss": 0.0097, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9479383022915319, | |
| "grad_norm": 0.7220777273178101, | |
| "learning_rate": 1.3675027262813523e-05, | |
| "loss": 0.0281, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.958834144846607, | |
| "grad_norm": 7.228305339813232, | |
| "learning_rate": 1.360232642675391e-05, | |
| "loss": 0.0095, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.969729987401682, | |
| "grad_norm": 0.31951704621315, | |
| "learning_rate": 1.3529625590694292e-05, | |
| "loss": 0.0148, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9806258299567572, | |
| "grad_norm": 0.009546870365738869, | |
| "learning_rate": 1.3456924754634679e-05, | |
| "loss": 0.0051, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9915216725118322, | |
| "grad_norm": 2.050363063812256, | |
| "learning_rate": 1.3384223918575065e-05, | |
| "loss": 0.0306, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.0032687527665225, | |
| "grad_norm": 1.1950825452804565, | |
| "learning_rate": 1.331152308251545e-05, | |
| "loss": 0.0061, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.0141645953215976, | |
| "grad_norm": 0.02007538639008999, | |
| "learning_rate": 1.3238822246455837e-05, | |
| "loss": 0.005, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0250604378766728, | |
| "grad_norm": 0.053643591701984406, | |
| "learning_rate": 1.316612141039622e-05, | |
| "loss": 0.0093, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0359562804317477, | |
| "grad_norm": 0.13197128474712372, | |
| "learning_rate": 1.3093420574336606e-05, | |
| "loss": 0.0123, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0468521229868228, | |
| "grad_norm": 0.20932506024837494, | |
| "learning_rate": 1.3020719738276992e-05, | |
| "loss": 0.0267, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.057747965541898, | |
| "grad_norm": 0.11939968913793564, | |
| "learning_rate": 1.2948018902217377e-05, | |
| "loss": 0.0042, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.068643808096973, | |
| "grad_norm": 0.08671363443136215, | |
| "learning_rate": 1.2875318066157762e-05, | |
| "loss": 0.009, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.079539650652048, | |
| "grad_norm": 0.025082537904381752, | |
| "learning_rate": 1.2802617230098147e-05, | |
| "loss": 0.0028, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.0904354932071232, | |
| "grad_norm": 0.005358474794775248, | |
| "learning_rate": 1.2729916394038533e-05, | |
| "loss": 0.0017, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1013313357621983, | |
| "grad_norm": 0.008662994019687176, | |
| "learning_rate": 1.2657215557978916e-05, | |
| "loss": 0.0013, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.1122271783172732, | |
| "grad_norm": 2.0191564559936523, | |
| "learning_rate": 1.2584514721919303e-05, | |
| "loss": 0.0179, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.1231230208723484, | |
| "grad_norm": 0.025384988635778427, | |
| "learning_rate": 1.2511813885859689e-05, | |
| "loss": 0.02, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1340188634274235, | |
| "grad_norm": 0.011868833564221859, | |
| "learning_rate": 1.2439113049800074e-05, | |
| "loss": 0.0024, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1449147059824987, | |
| "grad_norm": 0.010154581628739834, | |
| "learning_rate": 1.236641221374046e-05, | |
| "loss": 0.0053, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1558105485375736, | |
| "grad_norm": 0.09402716159820557, | |
| "learning_rate": 1.2293711377680843e-05, | |
| "loss": 0.005, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.1667063910926487, | |
| "grad_norm": 0.3972262442111969, | |
| "learning_rate": 1.222101054162123e-05, | |
| "loss": 0.0065, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.1776022336477239, | |
| "grad_norm": 0.02627560682594776, | |
| "learning_rate": 1.2148309705561614e-05, | |
| "loss": 0.0192, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1884980762027988, | |
| "grad_norm": 0.538215160369873, | |
| "learning_rate": 1.2075608869502e-05, | |
| "loss": 0.0073, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.199393918757874, | |
| "grad_norm": 0.48226070404052734, | |
| "learning_rate": 1.2002908033442387e-05, | |
| "loss": 0.0009, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.210289761312949, | |
| "grad_norm": 0.5596455335617065, | |
| "learning_rate": 1.193020719738277e-05, | |
| "loss": 0.0119, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.2211856038680242, | |
| "grad_norm": 0.03299971669912338, | |
| "learning_rate": 1.1857506361323157e-05, | |
| "loss": 0.0025, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2320814464230991, | |
| "grad_norm": 0.03791365772485733, | |
| "learning_rate": 1.1784805525263541e-05, | |
| "loss": 0.0147, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2429772889781743, | |
| "grad_norm": 0.6537386178970337, | |
| "learning_rate": 1.1712104689203926e-05, | |
| "loss": 0.0026, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.2538731315332494, | |
| "grad_norm": 0.02327698841691017, | |
| "learning_rate": 1.1639403853144313e-05, | |
| "loss": 0.0012, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2647689740883243, | |
| "grad_norm": 0.024980690330266953, | |
| "learning_rate": 1.1566703017084697e-05, | |
| "loss": 0.0053, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.2756648166433995, | |
| "grad_norm": 0.01306835189461708, | |
| "learning_rate": 1.1494002181025084e-05, | |
| "loss": 0.0179, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2865606591984746, | |
| "grad_norm": 0.005500817205756903, | |
| "learning_rate": 1.1421301344965467e-05, | |
| "loss": 0.0117, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2974565017535498, | |
| "grad_norm": 2.294457197189331, | |
| "learning_rate": 1.1348600508905853e-05, | |
| "loss": 0.0065, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.3083523443086247, | |
| "grad_norm": 3.2596099376678467, | |
| "learning_rate": 1.1275899672846238e-05, | |
| "loss": 0.0128, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3192481868636998, | |
| "grad_norm": 0.014325232245028019, | |
| "learning_rate": 1.1203198836786624e-05, | |
| "loss": 0.004, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.330144029418775, | |
| "grad_norm": 0.08742561936378479, | |
| "learning_rate": 1.1130498000727011e-05, | |
| "loss": 0.005, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.3410398719738499, | |
| "grad_norm": 0.06310788542032242, | |
| "learning_rate": 1.1057797164667394e-05, | |
| "loss": 0.0062, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.351935714528925, | |
| "grad_norm": 0.02661961503326893, | |
| "learning_rate": 1.098509632860778e-05, | |
| "loss": 0.001, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.3628315570840002, | |
| "grad_norm": 0.008728576824069023, | |
| "learning_rate": 1.0912395492548165e-05, | |
| "loss": 0.0065, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3737273996390753, | |
| "grad_norm": 0.40287479758262634, | |
| "learning_rate": 1.0839694656488552e-05, | |
| "loss": 0.0115, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.3846232421941502, | |
| "grad_norm": 0.0008290009573101997, | |
| "learning_rate": 1.0766993820428935e-05, | |
| "loss": 0.0023, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3955190847492254, | |
| "grad_norm": 0.20154079794883728, | |
| "learning_rate": 1.0694292984369321e-05, | |
| "loss": 0.004, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.4064149273043005, | |
| "grad_norm": 0.032378897070884705, | |
| "learning_rate": 1.0621592148309707e-05, | |
| "loss": 0.0103, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.4173107698593754, | |
| "grad_norm": 0.037077393382787704, | |
| "learning_rate": 1.0548891312250092e-05, | |
| "loss": 0.0048, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4282066124144506, | |
| "grad_norm": 0.0009527279180474579, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.0197, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.4391024549695257, | |
| "grad_norm": 0.6460732221603394, | |
| "learning_rate": 1.0403489640130862e-05, | |
| "loss": 0.0085, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.4499982975246009, | |
| "grad_norm": 0.18065184354782104, | |
| "learning_rate": 1.0330788804071248e-05, | |
| "loss": 0.0021, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.4608941400796758, | |
| "grad_norm": 0.08325136452913284, | |
| "learning_rate": 1.0258087968011631e-05, | |
| "loss": 0.0079, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.471789982634751, | |
| "grad_norm": 0.0035695817787200212, | |
| "learning_rate": 1.0185387131952018e-05, | |
| "loss": 0.0001, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.482685825189826, | |
| "grad_norm": 0.00448552705347538, | |
| "learning_rate": 1.0112686295892404e-05, | |
| "loss": 0.0004, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.493581667744901, | |
| "grad_norm": 0.027783585712313652, | |
| "learning_rate": 1.0039985459832789e-05, | |
| "loss": 0.011, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.5044775102999761, | |
| "grad_norm": 2.4403154850006104, | |
| "learning_rate": 9.967284623773175e-06, | |
| "loss": 0.0162, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.5153733528550513, | |
| "grad_norm": 0.031121332198381424, | |
| "learning_rate": 9.89458378771356e-06, | |
| "loss": 0.0019, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.5262691954101264, | |
| "grad_norm": 0.01372817624360323, | |
| "learning_rate": 9.821882951653945e-06, | |
| "loss": 0.0107, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.5371650379652015, | |
| "grad_norm": 0.015296364203095436, | |
| "learning_rate": 9.74918211559433e-06, | |
| "loss": 0.0107, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.5480608805202765, | |
| "grad_norm": 0.022742554545402527, | |
| "learning_rate": 9.676481279534716e-06, | |
| "loss": 0.0055, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.5589567230753516, | |
| "grad_norm": 0.005425534211099148, | |
| "learning_rate": 9.6037804434751e-06, | |
| "loss": 0.001, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.5698525656304265, | |
| "grad_norm": 0.0004977713688276708, | |
| "learning_rate": 9.531079607415487e-06, | |
| "loss": 0.0015, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5807484081855017, | |
| "grad_norm": 0.016388392075896263, | |
| "learning_rate": 9.458378771355872e-06, | |
| "loss": 0.0213, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5916442507405768, | |
| "grad_norm": 0.029239172115921974, | |
| "learning_rate": 9.385677935296256e-06, | |
| "loss": 0.0032, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.602540093295652, | |
| "grad_norm": 0.25184109807014465, | |
| "learning_rate": 9.312977099236641e-06, | |
| "loss": 0.0139, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.613435935850727, | |
| "grad_norm": 0.5452978014945984, | |
| "learning_rate": 9.240276263177028e-06, | |
| "loss": 0.001, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.624331778405802, | |
| "grad_norm": 0.00713045010343194, | |
| "learning_rate": 9.167575427117412e-06, | |
| "loss": 0.0068, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.6352276209608771, | |
| "grad_norm": 0.04856117442250252, | |
| "learning_rate": 9.094874591057799e-06, | |
| "loss": 0.013, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.646123463515952, | |
| "grad_norm": 0.6631866693496704, | |
| "learning_rate": 9.022173754998184e-06, | |
| "loss": 0.0118, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.6570193060710272, | |
| "grad_norm": 0.34849047660827637, | |
| "learning_rate": 8.949472918938568e-06, | |
| "loss": 0.004, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.6679151486261024, | |
| "grad_norm": 0.011874212883412838, | |
| "learning_rate": 8.876772082878955e-06, | |
| "loss": 0.002, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.6788109911811775, | |
| "grad_norm": 0.05654163286089897, | |
| "learning_rate": 8.80407124681934e-06, | |
| "loss": 0.0033, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.6897068337362526, | |
| "grad_norm": 0.05505364388227463, | |
| "learning_rate": 8.731370410759724e-06, | |
| "loss": 0.0016, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.7006026762913276, | |
| "grad_norm": 0.8052054047584534, | |
| "learning_rate": 8.658669574700109e-06, | |
| "loss": 0.0033, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.7114985188464027, | |
| "grad_norm": 0.001815033028833568, | |
| "learning_rate": 8.585968738640495e-06, | |
| "loss": 0.0026, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.7223943614014776, | |
| "grad_norm": 0.17480531334877014, | |
| "learning_rate": 8.51326790258088e-06, | |
| "loss": 0.0064, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.7332902039565528, | |
| "grad_norm": 0.005486777517944574, | |
| "learning_rate": 8.440567066521266e-06, | |
| "loss": 0.0208, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.10310015082359314, | |
| "learning_rate": 8.367866230461651e-06, | |
| "loss": 0.0005, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.755081889066703, | |
| "grad_norm": 0.008104170672595501, | |
| "learning_rate": 8.295165394402036e-06, | |
| "loss": 0.0087, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.7659777316217782, | |
| "grad_norm": 0.033456411212682724, | |
| "learning_rate": 8.22246455834242e-06, | |
| "loss": 0.0072, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.776873574176853, | |
| "grad_norm": 0.007005383726209402, | |
| "learning_rate": 8.149763722282807e-06, | |
| "loss": 0.014, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.7877694167319282, | |
| "grad_norm": 0.012260228395462036, | |
| "learning_rate": 8.077062886223192e-06, | |
| "loss": 0.0008, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.7986652592870032, | |
| "grad_norm": 0.0009957356378436089, | |
| "learning_rate": 8.004362050163578e-06, | |
| "loss": 0.0014, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.8095611018420783, | |
| "grad_norm": 0.005955096334218979, | |
| "learning_rate": 7.931661214103963e-06, | |
| "loss": 0.0005, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.8204569443971534, | |
| "grad_norm": 0.0004700123390648514, | |
| "learning_rate": 7.858960378044348e-06, | |
| "loss": 0.0028, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.8313527869522286, | |
| "grad_norm": 0.002416003029793501, | |
| "learning_rate": 7.786259541984733e-06, | |
| "loss": 0.0003, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.8422486295073037, | |
| "grad_norm": 0.028112288564443588, | |
| "learning_rate": 7.713558705925119e-06, | |
| "loss": 0.0318, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.8531444720623786, | |
| "grad_norm": 0.03914355859160423, | |
| "learning_rate": 7.640857869865504e-06, | |
| "loss": 0.0139, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.8640403146174538, | |
| "grad_norm": 4.869634628295898, | |
| "learning_rate": 7.568157033805889e-06, | |
| "loss": 0.0098, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.8749361571725287, | |
| "grad_norm": 1.1335488557815552, | |
| "learning_rate": 7.495456197746275e-06, | |
| "loss": 0.0174, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.8858319997276038, | |
| "grad_norm": 0.6747786402702332, | |
| "learning_rate": 7.42275536168666e-06, | |
| "loss": 0.0044, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.896727842282679, | |
| "grad_norm": 0.9970724582672119, | |
| "learning_rate": 7.350054525627045e-06, | |
| "loss": 0.0087, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.9076236848377541, | |
| "grad_norm": 0.16893063485622406, | |
| "learning_rate": 7.27735368956743e-06, | |
| "loss": 0.0032, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.9185195273928293, | |
| "grad_norm": 0.8119887709617615, | |
| "learning_rate": 7.204652853507816e-06, | |
| "loss": 0.0153, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.9294153699479044, | |
| "grad_norm": 0.006383243482559919, | |
| "learning_rate": 7.131952017448202e-06, | |
| "loss": 0.0034, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.9403112125029793, | |
| "grad_norm": 0.03637854754924774, | |
| "learning_rate": 7.059251181388587e-06, | |
| "loss": 0.0034, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.9512070550580543, | |
| "grad_norm": 0.04712774232029915, | |
| "learning_rate": 6.9865503453289714e-06, | |
| "loss": 0.0234, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.9621028976131294, | |
| "grad_norm": 6.268856525421143, | |
| "learning_rate": 6.913849509269357e-06, | |
| "loss": 0.0265, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.9729987401682045, | |
| "grad_norm": 0.6448054313659668, | |
| "learning_rate": 6.841148673209742e-06, | |
| "loss": 0.0057, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.9838945827232797, | |
| "grad_norm": 0.07000619918107986, | |
| "learning_rate": 6.768447837150128e-06, | |
| "loss": 0.0005, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.9947904252783548, | |
| "grad_norm": 0.012424224987626076, | |
| "learning_rate": 6.695747001090514e-06, | |
| "loss": 0.0039, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.006537505533045, | |
| "grad_norm": 0.08453727513551712, | |
| "learning_rate": 6.6230461650308985e-06, | |
| "loss": 0.0006, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.01743334808812, | |
| "grad_norm": 0.0390053391456604, | |
| "learning_rate": 6.550345328971284e-06, | |
| "loss": 0.0006, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.0283291906431953, | |
| "grad_norm": 0.013394408859312534, | |
| "learning_rate": 6.477644492911669e-06, | |
| "loss": 0.0049, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.0392250331982704, | |
| "grad_norm": 0.0027593837585300207, | |
| "learning_rate": 6.404943656852054e-06, | |
| "loss": 0.0008, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.0501208757533456, | |
| "grad_norm": 0.0010020197369158268, | |
| "learning_rate": 6.332242820792439e-06, | |
| "loss": 0.0023, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.0610167183084203, | |
| "grad_norm": 0.0010899041080847383, | |
| "learning_rate": 6.259541984732826e-06, | |
| "loss": 0.0005, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.0719125608634954, | |
| "grad_norm": 0.03333039954304695, | |
| "learning_rate": 6.18684114867321e-06, | |
| "loss": 0.0011, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.0828084034185705, | |
| "grad_norm": 0.002606542780995369, | |
| "learning_rate": 6.114140312613596e-06, | |
| "loss": 0.0062, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.0937042459736457, | |
| "grad_norm": 0.008523502387106419, | |
| "learning_rate": 6.041439476553981e-06, | |
| "loss": 0.0001, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.104600088528721, | |
| "grad_norm": 0.005313311703503132, | |
| "learning_rate": 5.968738640494366e-06, | |
| "loss": 0.0095, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.115495931083796, | |
| "grad_norm": 0.030115563422441483, | |
| "learning_rate": 5.896037804434751e-06, | |
| "loss": 0.0011, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.126391773638871, | |
| "grad_norm": 0.001531143207103014, | |
| "learning_rate": 5.823336968375137e-06, | |
| "loss": 0.0047, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.137287616193946, | |
| "grad_norm": 0.013100974261760712, | |
| "learning_rate": 5.750636132315522e-06, | |
| "loss": 0.0041, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.148183458749021, | |
| "grad_norm": 0.010219580493867397, | |
| "learning_rate": 5.677935296255908e-06, | |
| "loss": 0.0012, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.159079301304096, | |
| "grad_norm": 0.02304321527481079, | |
| "learning_rate": 5.6052344601962925e-06, | |
| "loss": 0.0006, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.1699751438591712, | |
| "grad_norm": 0.32716256380081177, | |
| "learning_rate": 5.532533624136678e-06, | |
| "loss": 0.0005, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.1808709864142464, | |
| "grad_norm": 0.003199178259819746, | |
| "learning_rate": 5.459832788077063e-06, | |
| "loss": 0.0002, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.1917668289693215, | |
| "grad_norm": 0.10407451540231705, | |
| "learning_rate": 5.387131952017448e-06, | |
| "loss": 0.0026, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.2026626715243967, | |
| "grad_norm": 0.0036433066707104445, | |
| "learning_rate": 5.314431115957834e-06, | |
| "loss": 0.0053, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.2135585140794714, | |
| "grad_norm": 0.22139491140842438, | |
| "learning_rate": 5.2417302798982195e-06, | |
| "loss": 0.0013, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.2244543566345465, | |
| "grad_norm": 0.00901265349239111, | |
| "learning_rate": 5.169029443838604e-06, | |
| "loss": 0.0004, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.2353501991896216, | |
| "grad_norm": 0.007596256677061319, | |
| "learning_rate": 5.09632860777899e-06, | |
| "loss": 0.0002, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.2462460417446968, | |
| "grad_norm": 0.05308268591761589, | |
| "learning_rate": 5.023627771719375e-06, | |
| "loss": 0.0001, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.257141884299772, | |
| "grad_norm": 0.005023419391363859, | |
| "learning_rate": 4.95092693565976e-06, | |
| "loss": 0.0001, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.268037726854847, | |
| "grad_norm": 0.09251435101032257, | |
| "learning_rate": 4.878226099600146e-06, | |
| "loss": 0.0008, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.278933569409922, | |
| "grad_norm": 0.0035660325083881617, | |
| "learning_rate": 4.8055252635405305e-06, | |
| "loss": 0.0029, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.2898294119649973, | |
| "grad_norm": 0.00022365724726114422, | |
| "learning_rate": 4.732824427480917e-06, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.300725254520072, | |
| "grad_norm": 0.28966161608695984, | |
| "learning_rate": 4.660123591421302e-06, | |
| "loss": 0.0004, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.311621097075147, | |
| "grad_norm": 0.000494773150421679, | |
| "learning_rate": 4.5874227553616864e-06, | |
| "loss": 0.0003, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.3225169396302223, | |
| "grad_norm": 0.2110077142715454, | |
| "learning_rate": 4.514721919302073e-06, | |
| "loss": 0.0007, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.3334127821852975, | |
| "grad_norm": 0.0006416022079065442, | |
| "learning_rate": 4.442021083242458e-06, | |
| "loss": 0.0006, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.3443086247403726, | |
| "grad_norm": 0.0005581114673987031, | |
| "learning_rate": 4.369320247182842e-06, | |
| "loss": 0.0004, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.3552044672954477, | |
| "grad_norm": 0.0006430571665987372, | |
| "learning_rate": 4.296619411123229e-06, | |
| "loss": 0.0013, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.3661003098505224, | |
| "grad_norm": 0.0002313524018973112, | |
| "learning_rate": 4.2239185750636135e-06, | |
| "loss": 0.0011, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.3769961524055976, | |
| "grad_norm": 0.01299639604985714, | |
| "learning_rate": 4.151217739003999e-06, | |
| "loss": 0.0002, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.3878919949606727, | |
| "grad_norm": 0.036279868334531784, | |
| "learning_rate": 4.078516902944385e-06, | |
| "loss": 0.0, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.398787837515748, | |
| "grad_norm": 0.0004496763285715133, | |
| "learning_rate": 4.005816066884769e-06, | |
| "loss": 0.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.409683680070823, | |
| "grad_norm": 0.010034661740064621, | |
| "learning_rate": 3.933115230825155e-06, | |
| "loss": 0.0, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.420579522625898, | |
| "grad_norm": 0.0027114665135741234, | |
| "learning_rate": 3.860414394765541e-06, | |
| "loss": 0.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.4314753651809733, | |
| "grad_norm": 0.00021306249254848808, | |
| "learning_rate": 3.7877135587059253e-06, | |
| "loss": 0.0, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.4423712077360484, | |
| "grad_norm": 0.002327492693439126, | |
| "learning_rate": 3.7150127226463105e-06, | |
| "loss": 0.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.453267050291123, | |
| "grad_norm": 0.0042752730660140514, | |
| "learning_rate": 3.6423118865866965e-06, | |
| "loss": 0.0001, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.4641628928461983, | |
| "grad_norm": 0.5819891691207886, | |
| "learning_rate": 3.5696110505270817e-06, | |
| "loss": 0.0014, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.4750587354012734, | |
| "grad_norm": 0.0002232871629530564, | |
| "learning_rate": 3.4969102144674664e-06, | |
| "loss": 0.0, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.4859545779563486, | |
| "grad_norm": 0.0006547856028191745, | |
| "learning_rate": 3.4242093784078516e-06, | |
| "loss": 0.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.4968504205114237, | |
| "grad_norm": 0.007096582092344761, | |
| "learning_rate": 3.3515085423482376e-06, | |
| "loss": 0.0, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.507746263066499, | |
| "grad_norm": 0.007319641765207052, | |
| "learning_rate": 3.2788077062886227e-06, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.5186421056215735, | |
| "grad_norm": 0.00013177268556319177, | |
| "learning_rate": 3.206106870229008e-06, | |
| "loss": 0.0, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.5295379481766487, | |
| "grad_norm": 0.001638653688132763, | |
| "learning_rate": 3.1334060341693935e-06, | |
| "loss": 0.0002, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.540433790731724, | |
| "grad_norm": 0.00048312891158275306, | |
| "learning_rate": 3.0607051981097786e-06, | |
| "loss": 0.0, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.551329633286799, | |
| "grad_norm": 0.001063148258253932, | |
| "learning_rate": 2.988004362050164e-06, | |
| "loss": 0.0001, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.562225475841874, | |
| "grad_norm": 0.005976190324872732, | |
| "learning_rate": 2.9153035259905494e-06, | |
| "loss": 0.0, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.5731213183969492, | |
| "grad_norm": 0.001030449871905148, | |
| "learning_rate": 2.8426026899309345e-06, | |
| "loss": 0.0001, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.5840171609520244, | |
| "grad_norm": 0.000677391595672816, | |
| "learning_rate": 2.7699018538713197e-06, | |
| "loss": 0.0016, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.5949130035070995, | |
| "grad_norm": 1.1224867105484009, | |
| "learning_rate": 2.6972010178117053e-06, | |
| "loss": 0.0036, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.6058088460621747, | |
| "grad_norm": 0.0026874279137700796, | |
| "learning_rate": 2.6245001817520905e-06, | |
| "loss": 0.0, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.6167046886172494, | |
| "grad_norm": 0.003862058976665139, | |
| "learning_rate": 2.5517993456924756e-06, | |
| "loss": 0.0001, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.6276005311723245, | |
| "grad_norm": 0.0830313041806221, | |
| "learning_rate": 2.4790985096328608e-06, | |
| "loss": 0.0014, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.6384963737273996, | |
| "grad_norm": 0.0019621718674898148, | |
| "learning_rate": 2.4063976735732464e-06, | |
| "loss": 0.0005, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.649392216282475, | |
| "grad_norm": 0.28306806087493896, | |
| "learning_rate": 2.3336968375136315e-06, | |
| "loss": 0.0002, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.66028805883755, | |
| "grad_norm": 0.004503046162426472, | |
| "learning_rate": 2.260996001454017e-06, | |
| "loss": 0.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.6711839013926246, | |
| "grad_norm": 0.0008729721885174513, | |
| "learning_rate": 2.1882951653944023e-06, | |
| "loss": 0.0008, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.6820797439476998, | |
| "grad_norm": 0.010283468291163445, | |
| "learning_rate": 2.1155943293347874e-06, | |
| "loss": 0.0, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.692975586502775, | |
| "grad_norm": 1.8014414308709092e-05, | |
| "learning_rate": 2.042893493275173e-06, | |
| "loss": 0.0012, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.70387142905785, | |
| "grad_norm": 0.0013227862073108554, | |
| "learning_rate": 1.970192657215558e-06, | |
| "loss": 0.0001, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.714767271612925, | |
| "grad_norm": 9.750492608873174e-05, | |
| "learning_rate": 1.8974918211559433e-06, | |
| "loss": 0.0012, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.7256631141680003, | |
| "grad_norm": 0.009569020941853523, | |
| "learning_rate": 1.824790985096329e-06, | |
| "loss": 0.0001, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.7365589567230755, | |
| "grad_norm": 0.00015347945736721158, | |
| "learning_rate": 1.752090149036714e-06, | |
| "loss": 0.0001, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.7474547992781506, | |
| "grad_norm": 0.0024864268489181995, | |
| "learning_rate": 1.6793893129770995e-06, | |
| "loss": 0.0002, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.7583506418332258, | |
| "grad_norm": 0.0018065335461869836, | |
| "learning_rate": 1.6066884769174848e-06, | |
| "loss": 0.0, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.7692464843883005, | |
| "grad_norm": 0.000252872530836612, | |
| "learning_rate": 1.53398764085787e-06, | |
| "loss": 0.0002, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.7801423269433756, | |
| "grad_norm": 0.0006220173672772944, | |
| "learning_rate": 1.4612868047982554e-06, | |
| "loss": 0.0, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.7910381694984507, | |
| "grad_norm": 0.00021657197794411331, | |
| "learning_rate": 1.3885859687386405e-06, | |
| "loss": 0.002, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.801934012053526, | |
| "grad_norm": 0.062267255038022995, | |
| "learning_rate": 1.315885132679026e-06, | |
| "loss": 0.0001, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.812829854608601, | |
| "grad_norm": 0.00383751024492085, | |
| "learning_rate": 1.2431842966194113e-06, | |
| "loss": 0.0002, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.8237256971636757, | |
| "grad_norm": 9.788275929167867e-05, | |
| "learning_rate": 1.1704834605597967e-06, | |
| "loss": 0.0006, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.834621539718751, | |
| "grad_norm": 0.0013275217497721314, | |
| "learning_rate": 1.0977826245001818e-06, | |
| "loss": 0.0002, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.845517382273826, | |
| "grad_norm": 0.0015028759371489286, | |
| "learning_rate": 1.0250817884405672e-06, | |
| "loss": 0.0, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.856413224828901, | |
| "grad_norm": 0.00014119225670583546, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 0.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.8673090673839763, | |
| "grad_norm": 0.007295021787285805, | |
| "learning_rate": 8.796801163213378e-07, | |
| "loss": 0.0, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.8782049099390514, | |
| "grad_norm": 2.5996017939178273e-05, | |
| "learning_rate": 8.069792802617231e-07, | |
| "loss": 0.0001, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.8891007524941266, | |
| "grad_norm": 0.00027592500555329025, | |
| "learning_rate": 7.342784442021084e-07, | |
| "loss": 0.0001, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.8999965950492017, | |
| "grad_norm": 0.0033551298547536135, | |
| "learning_rate": 6.615776081424936e-07, | |
| "loss": 0.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.910892437604277, | |
| "grad_norm": 0.0005961539573036134, | |
| "learning_rate": 5.88876772082879e-07, | |
| "loss": 0.0, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.9217882801593515, | |
| "grad_norm": 0.0015423846198245883, | |
| "learning_rate": 5.161759360232643e-07, | |
| "loss": 0.0003, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.9326841227144267, | |
| "grad_norm": 0.000448063132353127, | |
| "learning_rate": 4.434750999636496e-07, | |
| "loss": 0.0031, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.943579965269502, | |
| "grad_norm": 0.003001452423632145, | |
| "learning_rate": 3.7077426390403497e-07, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.954475807824577, | |
| "grad_norm": 4.6965491492301226e-05, | |
| "learning_rate": 2.9807342784442023e-07, | |
| "loss": 0.0001, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.965371650379652, | |
| "grad_norm": 0.00013006600784137845, | |
| "learning_rate": 2.2537259178480555e-07, | |
| "loss": 0.001, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.9762674929347273, | |
| "grad_norm": 0.006912072654813528, | |
| "learning_rate": 1.5267175572519085e-07, | |
| "loss": 0.0, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.987163335489802, | |
| "grad_norm": 0.0006019837455824018, | |
| "learning_rate": 7.997091966557616e-08, | |
| "loss": 0.0, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.998059178044877, | |
| "grad_norm": 0.006343195680528879, | |
| "learning_rate": 7.2700836059614684e-09, | |
| "loss": 0.0005, | |
| "step": 2750 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2751, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1303954889740124e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |