| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 0.16436488926410675, |
| "learning_rate": 3e-06, |
| "loss": 1.2626, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 0.17262399196624756, |
| "learning_rate": 6.333333333333334e-06, |
| "loss": 1.2533, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.296794056892395, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 1.2115, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.18189626932144165, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 1.1331, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.19689704477787018, |
| "learning_rate": 1.6333333333333335e-05, |
| "loss": 1.0918, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.33070850372314453, |
| "learning_rate": 1.9666666666666666e-05, |
| "loss": 1.075, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.18935681879520416, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 1.0615, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.38514193892478943, |
| "learning_rate": 2.633333333333333e-05, |
| "loss": 1.0459, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.27131959795951843, |
| "learning_rate": 2.9666666666666672e-05, |
| "loss": 1.0219, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.5233219265937805, |
| "learning_rate": 3.3e-05, |
| "loss": 0.9815, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.3682015836238861, |
| "learning_rate": 3.633333333333333e-05, |
| "loss": 0.9351, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.5758795738220215, |
| "learning_rate": 3.966666666666667e-05, |
| "loss": 0.8842, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.9897364974021912, |
| "learning_rate": 4.3e-05, |
| "loss": 0.8308, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.6590216755867004, |
| "learning_rate": 4.633333333333333e-05, |
| "loss": 0.7733, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.850604236125946, |
| "learning_rate": 4.966666666666667e-05, |
| "loss": 0.7366, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.7046625018119812, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.6911, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.8093236684799194, |
| "learning_rate": 5.633333333333334e-05, |
| "loss": 0.6561, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 1.1920536756515503, |
| "learning_rate": 5.966666666666667e-05, |
| "loss": 0.624, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.8813220858573914, |
| "learning_rate": 6.3e-05, |
| "loss": 0.6004, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.9447306394577026, |
| "learning_rate": 6.633333333333334e-05, |
| "loss": 0.5778, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 1.016005039215088, |
| "learning_rate": 6.966666666666668e-05, |
| "loss": 0.5577, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.9053665995597839, |
| "learning_rate": 7.3e-05, |
| "loss": 0.541, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 1.1023659706115723, |
| "learning_rate": 7.633333333333334e-05, |
| "loss": 0.516, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 1.2229526042938232, |
| "learning_rate": 7.966666666666666e-05, |
| "loss": 0.4996, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 1.1556382179260254, |
| "learning_rate": 8.3e-05, |
| "loss": 0.4822, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 1.349492073059082, |
| "learning_rate": 8.633333333333334e-05, |
| "loss": 0.4579, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 1.251599669456482, |
| "learning_rate": 8.966666666666666e-05, |
| "loss": 0.4291, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 1.1909986734390259, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.4068, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 1.2850487232208252, |
| "learning_rate": 9.633333333333335e-05, |
| "loss": 0.3932, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 1.3327720165252686, |
| "learning_rate": 9.966666666666667e-05, |
| "loss": 0.3716, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 1.6960358619689941, |
| "learning_rate": 9.999938485971279e-05, |
| "loss": 0.3564, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 1.3837307691574097, |
| "learning_rate": 9.999725846827562e-05, |
| "loss": 0.3295, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 0.9719492793083191, |
| "learning_rate": 9.999361329594254e-05, |
| "loss": 0.3078, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 1.0894017219543457, |
| "learning_rate": 9.998844945344405e-05, |
| "loss": 0.2905, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 1.0385677814483643, |
| "learning_rate": 9.99817670976436e-05, |
| "loss": 0.2703, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 1.2726777791976929, |
| "learning_rate": 9.997356643153303e-05, |
| "loss": 0.2523, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 1.377550482749939, |
| "learning_rate": 9.996384770422629e-05, |
| "loss": 0.2416, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.5664293766021729, |
| "learning_rate": 9.995261121095194e-05, |
| "loss": 0.2256, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.2346878051757812, |
| "learning_rate": 9.993985729304408e-05, |
| "loss": 0.1967, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.0488691329956055, |
| "learning_rate": 9.992558633793212e-05, |
| "loss": 0.1839, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.8018057346343994, |
| "learning_rate": 9.990979877912891e-05, |
| "loss": 0.1722, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.0807803869247437, |
| "learning_rate": 9.989249509621759e-05, |
| "loss": 0.1529, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.1561813354492188, |
| "learning_rate": 9.987367581483705e-05, |
| "loss": 0.144, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 1.4849426746368408, |
| "learning_rate": 9.985334150666592e-05, |
| "loss": 0.1278, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.3472307920455933, |
| "learning_rate": 9.983149278940526e-05, |
| "loss": 0.1207, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 1.3177121877670288, |
| "learning_rate": 9.980813032675974e-05, |
| "loss": 0.1188, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.1293466091156006, |
| "learning_rate": 9.978325482841753e-05, |
| "loss": 0.108, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 1.2579174041748047, |
| "learning_rate": 9.975686705002867e-05, |
| "loss": 0.0956, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.0814399719238281, |
| "learning_rate": 9.972896779318219e-05, |
| "loss": 0.0928, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 1.103074312210083, |
| "learning_rate": 9.969955790538175e-05, |
| "loss": 0.0919, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.310774564743042, |
| "learning_rate": 9.966863828001982e-05, |
| "loss": 0.0948, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.3567174673080444, |
| "learning_rate": 9.963620985635065e-05, |
| "loss": 0.0905, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 1.0070871114730835, |
| "learning_rate": 9.960227361946164e-05, |
| "loss": 0.093, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 1.145613431930542, |
| "learning_rate": 9.95668306002435e-05, |
| "loss": 0.0951, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 0.9971445202827454, |
| "learning_rate": 9.952988187535886e-05, |
| "loss": 0.094, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.1573495864868164, |
| "learning_rate": 9.949142856720961e-05, |
| "loss": 0.0842, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.1136912107467651, |
| "learning_rate": 9.945147184390278e-05, |
| "loss": 0.0855, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 1.0231609344482422, |
| "learning_rate": 9.941001291921512e-05, |
| "loss": 0.0775, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 1.0133978128433228, |
| "learning_rate": 9.936705305255612e-05, |
| "loss": 0.083, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 1.1500823497772217, |
| "learning_rate": 9.932259354892984e-05, |
| "loss": 0.0867, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 1.0416063070297241, |
| "learning_rate": 9.927663575889521e-05, |
| "loss": 0.0796, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.0557258129119873, |
| "learning_rate": 9.922918107852504e-05, |
| "loss": 0.0826, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 1.2491735219955444, |
| "learning_rate": 9.918023094936363e-05, |
| "loss": 0.0817, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 1.1741544008255005, |
| "learning_rate": 9.912978685838294e-05, |
| "loss": 0.0807, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.2721768617630005, |
| "learning_rate": 9.90778503379374e-05, |
| "loss": 0.0754, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.9619813561439514, |
| "learning_rate": 9.902442296571743e-05, |
| "loss": 0.0754, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 1.1113922595977783, |
| "learning_rate": 9.896950636470147e-05, |
| "loss": 0.0736, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.8889420032501221, |
| "learning_rate": 9.891310220310666e-05, |
| "loss": 0.0774, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 0.8371496200561523, |
| "learning_rate": 9.885521219433823e-05, |
| "loss": 0.0752, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 1.193962812423706, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 0.0739, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.9352642893791199, |
| "learning_rate": 9.873498171452789e-05, |
| "loss": 0.074, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.8476113677024841, |
| "learning_rate": 9.867264489576135e-05, |
| "loss": 0.0687, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 0.85703045129776, |
| "learning_rate": 9.860882953426099e-05, |
| "loss": 0.0721, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 1.0026068687438965, |
| "learning_rate": 9.854353756856412e-05, |
| "loss": 0.0632, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 0.8964142799377441, |
| "learning_rate": 9.847677098206332e-05, |
| "loss": 0.0628, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 0.9155259728431702, |
| "learning_rate": 9.840853180294608e-05, |
| "loss": 0.0647, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.9439025521278381, |
| "learning_rate": 9.833882210413332e-05, |
| "loss": 0.0699, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 0.9577764272689819, |
| "learning_rate": 9.826764400321633e-05, |
| "loss": 0.0639, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 1.0065102577209473, |
| "learning_rate": 9.819499966239243e-05, |
| "loss": 0.0613, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.9528071880340576, |
| "learning_rate": 9.812089128839938e-05, |
| "loss": 0.0643, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.8194341659545898, |
| "learning_rate": 9.804532113244828e-05, |
| "loss": 0.0647, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.8991771936416626, |
| "learning_rate": 9.796829149015517e-05, |
| "loss": 0.063, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.7631344795227051, |
| "learning_rate": 9.788980470147132e-05, |
| "loss": 0.0651, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.8909981846809387, |
| "learning_rate": 9.780986315061218e-05, |
| "loss": 0.0594, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.8621935248374939, |
| "learning_rate": 9.772846926598491e-05, |
| "loss": 0.0623, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.7843512296676636, |
| "learning_rate": 9.76456255201146e-05, |
| "loss": 0.061, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 0.8107233047485352, |
| "learning_rate": 9.756133442956923e-05, |
| "loss": 0.06, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.8250086307525635, |
| "learning_rate": 9.747559855488313e-05, |
| "loss": 0.0608, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 1.0516151189804077, |
| "learning_rate": 9.73884205004793e-05, |
| "loss": 0.0587, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.7967276573181152, |
| "learning_rate": 9.729980291459019e-05, |
| "loss": 0.0554, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.7640511393547058, |
| "learning_rate": 9.720974848917735e-05, |
| "loss": 0.0649, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.9864993095397949, |
| "learning_rate": 9.711825995984957e-05, |
| "loss": 0.0597, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.8970789313316345, |
| "learning_rate": 9.702534010577991e-05, |
| "loss": 0.0559, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.9033194780349731, |
| "learning_rate": 9.693099174962103e-05, |
| "loss": 0.0502, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.891749918460846, |
| "learning_rate": 9.683521775741977e-05, |
| "loss": 0.0646, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.8868605494499207, |
| "learning_rate": 9.673802103852979e-05, |
| "loss": 0.0592, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.9233248829841614, |
| "learning_rate": 9.663940454552342e-05, |
| "loss": 0.0566, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.8602434992790222, |
| "learning_rate": 9.65393712741018e-05, |
| "loss": 0.0534, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.8748095631599426, |
| "learning_rate": 9.6437924263004e-05, |
| "loss": 0.0543, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.7950878143310547, |
| "learning_rate": 9.63350665939146e-05, |
| "loss": 0.0517, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 0.8243885040283203, |
| "learning_rate": 9.623080139137023e-05, |
| "loss": 0.0516, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 0.772615373134613, |
| "learning_rate": 9.612513182266447e-05, |
| "loss": 0.053, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 0.8965969681739807, |
| "learning_rate": 9.601806109775179e-05, |
| "loss": 0.0514, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 0.8220785856246948, |
| "learning_rate": 9.590959246914995e-05, |
| "loss": 0.0585, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 0.8071192502975464, |
| "learning_rate": 9.579972923184122e-05, |
| "loss": 0.0528, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 0.715766429901123, |
| "learning_rate": 9.568847472317232e-05, |
| "loss": 0.0549, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 0.7731180191040039, |
| "learning_rate": 9.557583232275303e-05, |
| "loss": 0.051, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 0.7870914936065674, |
| "learning_rate": 9.546180545235344e-05, |
| "loss": 0.0535, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 0.7550817131996155, |
| "learning_rate": 9.534639757580013e-05, |
| "loss": 0.0556, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 0.8336426615715027, |
| "learning_rate": 9.522961219887092e-05, |
| "loss": 0.0515, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 0.7580074071884155, |
| "learning_rate": 9.511145286918828e-05, |
| "loss": 0.0547, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 0.7820581793785095, |
| "learning_rate": 9.499192317611167e-05, |
| "loss": 0.0546, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 0.8143420815467834, |
| "learning_rate": 9.487102675062851e-05, |
| "loss": 0.051, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 0.9172049164772034, |
| "learning_rate": 9.474876726524374e-05, |
| "loss": 0.055, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 0.6472367644309998, |
| "learning_rate": 9.462514843386845e-05, |
| "loss": 0.0475, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 0.8475685119628906, |
| "learning_rate": 9.450017401170689e-05, |
| "loss": 0.0481, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 0.6527271866798401, |
| "learning_rate": 9.437384779514256e-05, |
| "loss": 0.049, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 0.7633675932884216, |
| "learning_rate": 9.424617362162271e-05, |
| "loss": 0.044, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 0.6868181824684143, |
| "learning_rate": 9.411715536954196e-05, |
| "loss": 0.0493, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 0.7303510904312134, |
| "learning_rate": 9.39867969581243e-05, |
| "loss": 0.0526, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 0.7577109336853027, |
| "learning_rate": 9.385510234730415e-05, |
| "loss": 0.0549, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 0.671566903591156, |
| "learning_rate": 9.372207553760603e-05, |
| "loss": 0.0492, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.6442204713821411, |
| "learning_rate": 9.358772057002312e-05, |
| "loss": 0.0489, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 0.8377021551132202, |
| "learning_rate": 9.345204152589428e-05, |
| "loss": 0.0462, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 0.7773048281669617, |
| "learning_rate": 9.331504252678037e-05, |
| "loss": 0.0519, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 0.8093121647834778, |
| "learning_rate": 9.317672773433876e-05, |
| "loss": 0.0497, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 0.8040132522583008, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 0.0512, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 0.7714861035346985, |
| "learning_rate": 9.289616761582587e-05, |
| "loss": 0.0496, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 0.8005548715591431, |
| "learning_rate": 9.275393081240882e-05, |
| "loss": 0.0469, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 0.6162385940551758, |
| "learning_rate": 9.261039526071374e-05, |
| "loss": 0.0455, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 0.8203111290931702, |
| "learning_rate": 9.246556532096078e-05, |
| "loss": 0.0501, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 0.7906405329704285, |
| "learning_rate": 9.231944539269009e-05, |
| "loss": 0.0511, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 0.6650158166885376, |
| "learning_rate": 9.217203991462815e-05, |
| "loss": 0.0449, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 0.681294858455658, |
| "learning_rate": 9.202335336455296e-05, |
| "loss": 0.0497, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 0.7014471888542175, |
| "learning_rate": 9.187339025915802e-05, |
| "loss": 0.0454, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 0.768068790435791, |
| "learning_rate": 9.17221551539151e-05, |
| "loss": 0.048, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 0.6616342663764954, |
| "learning_rate": 9.156965264293586e-05, |
| "loss": 0.0446, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 0.8056400418281555, |
| "learning_rate": 9.141588735883232e-05, |
| "loss": 0.0496, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 0.7025634050369263, |
| "learning_rate": 9.126086397257612e-05, |
| "loss": 0.0477, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.7773793935775757, |
| "learning_rate": 9.110458719335659e-05, |
| "loss": 0.0478, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 0.696651041507721, |
| "learning_rate": 9.094706176843777e-05, |
| "loss": 0.0472, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 0.6660913228988647, |
| "learning_rate": 9.078829248301417e-05, |
| "loss": 0.0465, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 0.7574058771133423, |
| "learning_rate": 9.062828416006539e-05, |
| "loss": 0.0443, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 0.6740890145301819, |
| "learning_rate": 9.046704166020961e-05, |
| "loss": 0.0472, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 0.6242374181747437, |
| "learning_rate": 9.030456988155596e-05, |
| "loss": 0.0477, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 0.6770017147064209, |
| "learning_rate": 9.014087375955573e-05, |
| "loss": 0.0469, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 0.6208916306495667, |
| "learning_rate": 8.997595826685243e-05, |
| "loss": 0.0451, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 0.6913909316062927, |
| "learning_rate": 8.980982841313074e-05, |
| "loss": 0.0432, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 0.67692631483078, |
| "learning_rate": 8.964248924496435e-05, |
| "loss": 0.0475, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.6939643621444702, |
| "learning_rate": 8.947394584566258e-05, |
| "loss": 0.0395, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 90, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|