Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4875076173065204, | |
| "eval_steps": 500, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004062563477554337, | |
| "grad_norm": 1.0028599500656128, | |
| "learning_rate": 9.91876523151909e-05, | |
| "loss": 2.047, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008125126955108674, | |
| "grad_norm": 0.8161811232566833, | |
| "learning_rate": 9.83753046303818e-05, | |
| "loss": 1.9702, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01218769043266301, | |
| "grad_norm": 0.6260484457015991, | |
| "learning_rate": 9.756295694557271e-05, | |
| "loss": 1.6992, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01625025391021735, | |
| "grad_norm": 0.8630143404006958, | |
| "learning_rate": 9.675060926076362e-05, | |
| "loss": 1.6099, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020312817387771683, | |
| "grad_norm": 2.486990213394165, | |
| "learning_rate": 9.593826157595452e-05, | |
| "loss": 1.6885, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02437538086532602, | |
| "grad_norm": 1.343361258506775, | |
| "learning_rate": 9.512591389114541e-05, | |
| "loss": 1.2864, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028437944342880356, | |
| "grad_norm": 2.4562668800354004, | |
| "learning_rate": 9.431356620633631e-05, | |
| "loss": 1.4512, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0325005078204347, | |
| "grad_norm": 1.331054925918579, | |
| "learning_rate": 9.350121852152722e-05, | |
| "loss": 1.4368, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03656307129798903, | |
| "grad_norm": 3.2478368282318115, | |
| "learning_rate": 9.268887083671812e-05, | |
| "loss": 1.4787, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.040625634775543366, | |
| "grad_norm": 1.1060608625411987, | |
| "learning_rate": 9.187652315190903e-05, | |
| "loss": 1.3987, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04468819825309771, | |
| "grad_norm": 1.6176650524139404, | |
| "learning_rate": 9.106417546709992e-05, | |
| "loss": 1.5459, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04875076173065204, | |
| "grad_norm": 0.7203599810600281, | |
| "learning_rate": 9.025182778229082e-05, | |
| "loss": 1.3632, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05281332520820638, | |
| "grad_norm": 0.7429249882698059, | |
| "learning_rate": 8.943948009748173e-05, | |
| "loss": 1.4722, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05687588868576071, | |
| "grad_norm": 0.5126988291740417, | |
| "learning_rate": 8.862713241267263e-05, | |
| "loss": 1.4225, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06093845216331505, | |
| "grad_norm": 0.8644631505012512, | |
| "learning_rate": 8.781478472786352e-05, | |
| "loss": 1.3458, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0650010156408694, | |
| "grad_norm": 1.2231146097183228, | |
| "learning_rate": 8.708367181153534e-05, | |
| "loss": 1.453, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06906357911842373, | |
| "grad_norm": 0.8207014203071594, | |
| "learning_rate": 8.627132412672623e-05, | |
| "loss": 1.3441, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07312614259597806, | |
| "grad_norm": 0.5412943959236145, | |
| "learning_rate": 8.545897644191714e-05, | |
| "loss": 1.5198, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0771887060735324, | |
| "grad_norm": 0.8348711133003235, | |
| "learning_rate": 8.464662875710805e-05, | |
| "loss": 1.427, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08125126955108673, | |
| "grad_norm": 3.033511161804199, | |
| "learning_rate": 8.383428107229895e-05, | |
| "loss": 1.3741, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08531383302864107, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.310316815597076e-05, | |
| "loss": 1.4307, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08937639650619542, | |
| "grad_norm": 3.1870012283325195, | |
| "learning_rate": 8.229082047116166e-05, | |
| "loss": 1.4516, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09343895998374975, | |
| "grad_norm": 0.9713374972343445, | |
| "learning_rate": 8.147847278635257e-05, | |
| "loss": 1.3744, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09750152346130408, | |
| "grad_norm": 1.4386881589889526, | |
| "learning_rate": 8.066612510154347e-05, | |
| "loss": 1.3724, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10156408693885842, | |
| "grad_norm": 0.786233127117157, | |
| "learning_rate": 7.985377741673436e-05, | |
| "loss": 1.3245, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10562665041641275, | |
| "grad_norm": 0.8859283924102783, | |
| "learning_rate": 7.904142973192526e-05, | |
| "loss": 1.4272, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10968921389396709, | |
| "grad_norm": 0.8447876572608948, | |
| "learning_rate": 7.822908204711617e-05, | |
| "loss": 1.2975, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11375177737152142, | |
| "grad_norm": 1.0458195209503174, | |
| "learning_rate": 7.741673436230708e-05, | |
| "loss": 1.4476, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11781434084907577, | |
| "grad_norm": 0.719196617603302, | |
| "learning_rate": 7.660438667749798e-05, | |
| "loss": 1.372, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1218769043266301, | |
| "grad_norm": 0.9848544597625732, | |
| "learning_rate": 7.579203899268887e-05, | |
| "loss": 1.4798, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12593946780418444, | |
| "grad_norm": 1.8818702697753906, | |
| "learning_rate": 7.497969130787977e-05, | |
| "loss": 1.4432, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1300020312817388, | |
| "grad_norm": 2.130098342895508, | |
| "learning_rate": 7.416734362307067e-05, | |
| "loss": 1.6161, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1340645947592931, | |
| "grad_norm": 0.8697317838668823, | |
| "learning_rate": 7.335499593826158e-05, | |
| "loss": 1.2933, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13812715823684746, | |
| "grad_norm": 0.7327048182487488, | |
| "learning_rate": 7.254264825345249e-05, | |
| "loss": 1.4793, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14218972171440178, | |
| "grad_norm": 3.7592852115631104, | |
| "learning_rate": 7.173030056864339e-05, | |
| "loss": 1.4486, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14625228519195613, | |
| "grad_norm": 1.0223255157470703, | |
| "learning_rate": 7.091795288383428e-05, | |
| "loss": 1.3823, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.15031484866951045, | |
| "grad_norm": 1.6005347967147827, | |
| "learning_rate": 7.010560519902518e-05, | |
| "loss": 1.2388, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1543774121470648, | |
| "grad_norm": 0.7161141633987427, | |
| "learning_rate": 6.929325751421609e-05, | |
| "loss": 1.3212, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15843997562461914, | |
| "grad_norm": 1.3722366094589233, | |
| "learning_rate": 6.848090982940699e-05, | |
| "loss": 1.4035, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16250253910217347, | |
| "grad_norm": 1.0379196405410767, | |
| "learning_rate": 6.76685621445979e-05, | |
| "loss": 1.387, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16656510257972781, | |
| "grad_norm": 1.263466238975525, | |
| "learning_rate": 6.685621445978879e-05, | |
| "loss": 1.3714, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17062766605728213, | |
| "grad_norm": 0.9660252332687378, | |
| "learning_rate": 6.604386677497969e-05, | |
| "loss": 1.232, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17469022953483648, | |
| "grad_norm": 1.0394254922866821, | |
| "learning_rate": 6.52315190901706e-05, | |
| "loss": 1.5308, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17875279301239083, | |
| "grad_norm": 2.128751277923584, | |
| "learning_rate": 6.44191714053615e-05, | |
| "loss": 1.5051, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18281535648994515, | |
| "grad_norm": 1.6652791500091553, | |
| "learning_rate": 6.36068237205524e-05, | |
| "loss": 1.2875, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1868779199674995, | |
| "grad_norm": 5.205749988555908, | |
| "learning_rate": 6.27944760357433e-05, | |
| "loss": 1.4601, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19094048344505382, | |
| "grad_norm": 0.5795860886573792, | |
| "learning_rate": 6.19821283509342e-05, | |
| "loss": 1.3664, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19500304692260817, | |
| "grad_norm": 3.2349355220794678, | |
| "learning_rate": 6.116978066612511e-05, | |
| "loss": 1.393, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1990656104001625, | |
| "grad_norm": 1.8756282329559326, | |
| "learning_rate": 6.035743298131601e-05, | |
| "loss": 1.3348, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20312817387771684, | |
| "grad_norm": 0.7906926870346069, | |
| "learning_rate": 5.954508529650691e-05, | |
| "loss": 1.2582, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2071907373552712, | |
| "grad_norm": 1.1552308797836304, | |
| "learning_rate": 5.873273761169781e-05, | |
| "loss": 1.4132, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2112533008328255, | |
| "grad_norm": 1.942295789718628, | |
| "learning_rate": 5.7920389926888705e-05, | |
| "loss": 1.3779, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21531586431037986, | |
| "grad_norm": 0.5551020503044128, | |
| "learning_rate": 5.7108042242079615e-05, | |
| "loss": 1.285, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21937842778793418, | |
| "grad_norm": 1.6268082857131958, | |
| "learning_rate": 5.629569455727052e-05, | |
| "loss": 1.3329, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22344099126548853, | |
| "grad_norm": 1.1020852327346802, | |
| "learning_rate": 5.5483346872461415e-05, | |
| "loss": 1.4504, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22750355474304285, | |
| "grad_norm": 1.139066219329834, | |
| "learning_rate": 5.467099918765232e-05, | |
| "loss": 1.433, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2315661182205972, | |
| "grad_norm": 0.9007591009140015, | |
| "learning_rate": 5.3858651502843216e-05, | |
| "loss": 1.3293, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23562868169815154, | |
| "grad_norm": 1.9706934690475464, | |
| "learning_rate": 5.3046303818034126e-05, | |
| "loss": 1.4012, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23969124517570586, | |
| "grad_norm": 1.2953295707702637, | |
| "learning_rate": 5.223395613322502e-05, | |
| "loss": 1.3636, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2437538086532602, | |
| "grad_norm": 0.47138330340385437, | |
| "learning_rate": 5.1421608448415926e-05, | |
| "loss": 1.5204, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24781637213081453, | |
| "grad_norm": 0.7265433669090271, | |
| "learning_rate": 5.060926076360682e-05, | |
| "loss": 1.4484, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2518789356083689, | |
| "grad_norm": 0.7657461762428284, | |
| "learning_rate": 4.979691307879773e-05, | |
| "loss": 1.385, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.25594149908592323, | |
| "grad_norm": 1.854238510131836, | |
| "learning_rate": 4.898456539398863e-05, | |
| "loss": 1.3579, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2600040625634776, | |
| "grad_norm": 1.6956483125686646, | |
| "learning_rate": 4.817221770917953e-05, | |
| "loss": 1.5143, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26406662604103187, | |
| "grad_norm": 0.7038300037384033, | |
| "learning_rate": 4.735987002437044e-05, | |
| "loss": 1.4401, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2681291895185862, | |
| "grad_norm": 2.2135257720947266, | |
| "learning_rate": 4.6547522339561334e-05, | |
| "loss": 1.4839, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.27219175299614057, | |
| "grad_norm": 0.7248165011405945, | |
| "learning_rate": 4.573517465475224e-05, | |
| "loss": 1.2597, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2762543164736949, | |
| "grad_norm": 1.5216903686523438, | |
| "learning_rate": 4.492282696994314e-05, | |
| "loss": 1.3551, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.28031687995124926, | |
| "grad_norm": 0.6161156892776489, | |
| "learning_rate": 4.411047928513404e-05, | |
| "loss": 1.4665, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28437944342880356, | |
| "grad_norm": 0.6121994853019714, | |
| "learning_rate": 4.329813160032494e-05, | |
| "loss": 1.4145, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2884420069063579, | |
| "grad_norm": 1.0767112970352173, | |
| "learning_rate": 4.2485783915515845e-05, | |
| "loss": 1.5545, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.29250457038391225, | |
| "grad_norm": 0.6764030456542969, | |
| "learning_rate": 4.167343623070675e-05, | |
| "loss": 1.3465, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2965671338614666, | |
| "grad_norm": 0.7551003694534302, | |
| "learning_rate": 4.0861088545897645e-05, | |
| "loss": 1.536, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3006296973390209, | |
| "grad_norm": 1.1800507307052612, | |
| "learning_rate": 4.004874086108855e-05, | |
| "loss": 1.5582, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.30469226081657524, | |
| "grad_norm": 1.1166415214538574, | |
| "learning_rate": 3.923639317627945e-05, | |
| "loss": 1.415, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3087548242941296, | |
| "grad_norm": 0.7444128394126892, | |
| "learning_rate": 3.842404549147035e-05, | |
| "loss": 1.2128, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.31281738777168394, | |
| "grad_norm": 1.2892024517059326, | |
| "learning_rate": 3.761169780666125e-05, | |
| "loss": 1.4059, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3168799512492383, | |
| "grad_norm": 1.2301298379898071, | |
| "learning_rate": 3.6799350121852156e-05, | |
| "loss": 1.4935, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3209425147267926, | |
| "grad_norm": 1.2683095932006836, | |
| "learning_rate": 3.598700243704305e-05, | |
| "loss": 1.2894, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.32500507820434693, | |
| "grad_norm": 1.846632957458496, | |
| "learning_rate": 3.517465475223396e-05, | |
| "loss": 1.3461, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3290676416819013, | |
| "grad_norm": 0.7678124904632568, | |
| "learning_rate": 3.436230706742486e-05, | |
| "loss": 1.3318, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.33313020515945563, | |
| "grad_norm": 0.7512286305427551, | |
| "learning_rate": 3.3549959382615757e-05, | |
| "loss": 1.2822, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.33719276863701, | |
| "grad_norm": 0.5251843929290771, | |
| "learning_rate": 3.273761169780667e-05, | |
| "loss": 1.4101, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.34125533211456427, | |
| "grad_norm": 0.6997140645980835, | |
| "learning_rate": 3.1925264012997564e-05, | |
| "loss": 1.1655, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3453178955921186, | |
| "grad_norm": 0.742548406124115, | |
| "learning_rate": 3.111291632818847e-05, | |
| "loss": 1.4559, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.34938045906967297, | |
| "grad_norm": 0.7354167103767395, | |
| "learning_rate": 3.0300568643379367e-05, | |
| "loss": 1.2944, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3534430225472273, | |
| "grad_norm": 0.9748113751411438, | |
| "learning_rate": 2.9488220958570267e-05, | |
| "loss": 1.3814, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35750558602478166, | |
| "grad_norm": 1.8487335443496704, | |
| "learning_rate": 2.867587327376117e-05, | |
| "loss": 1.3519, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.36156814950233596, | |
| "grad_norm": 1.0059301853179932, | |
| "learning_rate": 2.786352558895207e-05, | |
| "loss": 1.4229, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3656307129798903, | |
| "grad_norm": 2.511502981185913, | |
| "learning_rate": 2.7051177904142978e-05, | |
| "loss": 1.481, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36969327645744465, | |
| "grad_norm": 1.8435344696044922, | |
| "learning_rate": 2.6238830219333875e-05, | |
| "loss": 1.4692, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.373755839934999, | |
| "grad_norm": 0.7523055672645569, | |
| "learning_rate": 2.5426482534524775e-05, | |
| "loss": 1.3803, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3778184034125533, | |
| "grad_norm": 1.177477240562439, | |
| "learning_rate": 2.461413484971568e-05, | |
| "loss": 1.347, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.38188096689010764, | |
| "grad_norm": 0.6719797253608704, | |
| "learning_rate": 2.3801787164906582e-05, | |
| "loss": 1.3816, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.385943530367662, | |
| "grad_norm": 1.7032560110092163, | |
| "learning_rate": 2.2989439480097482e-05, | |
| "loss": 1.4521, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.39000609384521634, | |
| "grad_norm": 1.8486378192901611, | |
| "learning_rate": 2.2177091795288386e-05, | |
| "loss": 1.2878, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3940686573227707, | |
| "grad_norm": 2.398749351501465, | |
| "learning_rate": 2.1364744110479286e-05, | |
| "loss": 1.5039, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.398131220800325, | |
| "grad_norm": 1.3361066579818726, | |
| "learning_rate": 2.0552396425670186e-05, | |
| "loss": 1.3444, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.40219378427787933, | |
| "grad_norm": 0.6444634795188904, | |
| "learning_rate": 1.974004874086109e-05, | |
| "loss": 1.3908, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4062563477554337, | |
| "grad_norm": 0.8480059504508972, | |
| "learning_rate": 1.892770105605199e-05, | |
| "loss": 1.3807, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.410318911232988, | |
| "grad_norm": 0.7580183744430542, | |
| "learning_rate": 1.8115353371242893e-05, | |
| "loss": 1.299, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4143814747105424, | |
| "grad_norm": 1.2310864925384521, | |
| "learning_rate": 1.7303005686433797e-05, | |
| "loss": 1.2885, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.41844403818809667, | |
| "grad_norm": 1.3021286725997925, | |
| "learning_rate": 1.6490658001624697e-05, | |
| "loss": 1.3035, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.422506601665651, | |
| "grad_norm": 1.0007705688476562, | |
| "learning_rate": 1.5678310316815597e-05, | |
| "loss": 1.3275, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42656916514320536, | |
| "grad_norm": 0.7786781191825867, | |
| "learning_rate": 1.48659626320065e-05, | |
| "loss": 1.2978, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4306317286207597, | |
| "grad_norm": 0.9036843180656433, | |
| "learning_rate": 1.4053614947197402e-05, | |
| "loss": 1.3758, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.43469429209831406, | |
| "grad_norm": 0.7931985259056091, | |
| "learning_rate": 1.3241267262388301e-05, | |
| "loss": 1.4206, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43875685557586835, | |
| "grad_norm": 0.951457679271698, | |
| "learning_rate": 1.2428919577579204e-05, | |
| "loss": 1.3873, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4428194190534227, | |
| "grad_norm": 1.5938663482666016, | |
| "learning_rate": 1.1616571892770106e-05, | |
| "loss": 1.2449, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.44688198253097705, | |
| "grad_norm": 0.6677514314651489, | |
| "learning_rate": 1.0804224207961008e-05, | |
| "loss": 1.3754, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4509445460085314, | |
| "grad_norm": 2.0201685428619385, | |
| "learning_rate": 9.99187652315191e-06, | |
| "loss": 1.4208, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4550071094860857, | |
| "grad_norm": 1.0941497087478638, | |
| "learning_rate": 9.179528838342812e-06, | |
| "loss": 1.3314, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.45906967296364004, | |
| "grad_norm": 0.7986588478088379, | |
| "learning_rate": 8.367181153533712e-06, | |
| "loss": 1.3455, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4631322364411944, | |
| "grad_norm": 0.594987690448761, | |
| "learning_rate": 7.554833468724615e-06, | |
| "loss": 1.2496, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.46719479991874874, | |
| "grad_norm": 0.8196585774421692, | |
| "learning_rate": 6.742485783915516e-06, | |
| "loss": 1.4404, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4712573633963031, | |
| "grad_norm": 0.6591458916664124, | |
| "learning_rate": 5.930138099106418e-06, | |
| "loss": 1.2863, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4753199268738574, | |
| "grad_norm": 1.8869117498397827, | |
| "learning_rate": 5.117790414297319e-06, | |
| "loss": 1.4505, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4793824903514117, | |
| "grad_norm": 1.0533993244171143, | |
| "learning_rate": 4.305442729488221e-06, | |
| "loss": 1.4071, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4834450538289661, | |
| "grad_norm": 0.95952308177948, | |
| "learning_rate": 3.4930950446791225e-06, | |
| "loss": 1.3619, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4875076173065204, | |
| "grad_norm": 2.069716691970825, | |
| "learning_rate": 2.6807473598700244e-06, | |
| "loss": 1.4363, | |
| "step": 1200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1231, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.383385539871539e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |