{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 32.68030548095703, "learning_rate": 8.000000000000001e-06, "logits/chosen": -1.6396703720092773, "logits/rejected": -4.051800727844238, "logps/chosen": -991.9453125, "logps/rejected": -804.3565673828125, "loss": 0.6639, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.03835174813866615, "rewards/margins": 0.06670190393924713, "rewards/rejected": -0.028350140899419785, "step": 5 }, { "epoch": 0.04, "grad_norm": 15.304901123046875, "learning_rate": 1.8e-05, "logits/chosen": -1.3240370750427246, "logits/rejected": -3.8731656074523926, "logps/chosen": -1051.1619873046875, "logps/rejected": -844.4005126953125, "loss": 0.4827, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.36078932881355286, "rewards/margins": 0.5343167185783386, "rewards/rejected": -0.17352741956710815, "step": 10 }, { "epoch": 0.06, "grad_norm": 0.7237646579742432, "learning_rate": 2.8000000000000003e-05, "logits/chosen": -1.3096798658370972, "logits/rejected": -3.6570611000061035, "logps/chosen": -1035.888427734375, "logps/rejected": -842.5685424804688, "loss": 0.0832, "rewards/accuracies": 1.0, "rewards/chosen": 2.0725021362304688, "rewards/margins": 3.071218729019165, "rewards/rejected": -0.9987167119979858, "step": 15 }, { "epoch": 0.08, "grad_norm": 0.0076161520555615425, "learning_rate": 3.8e-05, "logits/chosen": -0.8284958004951477, "logits/rejected": -2.9240927696228027, "logps/chosen": -870.0358276367188, "logps/rejected": -1002.2626953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 5.352972507476807, "rewards/margins": 8.709424018859863, "rewards/rejected": -3.3564505577087402, "step": 20 }, { "epoch": 0.1, "grad_norm": 0.004960057325661182, "learning_rate": 4.8e-05, "logits/chosen": -0.6791419982910156, "logits/rejected": NaN, "logps/chosen": -874.9759521484375, "logps/rejected": -876.6950073242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 10.639605522155762, "rewards/margins": 17.804119110107422, "rewards/rejected": -7.164514064788818, "step": 25 }, { "epoch": 0.12, "grad_norm": 1.3971771295473445e-07, "learning_rate": 4.999125183044924e-05, "logits/chosen": -0.34096163511276245, "logits/rejected": NaN, "logps/chosen": -939.2772216796875, "logps/rejected": -893.369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 14.58117961883545, "rewards/margins": 25.044315338134766, "rewards/rejected": -10.463134765625, "step": 30 }, { "epoch": 0.14, "grad_norm": 1.2492258338880902e-09, "learning_rate": 4.9955722884434114e-05, "logits/chosen": -0.140629380941391, "logits/rejected": -1.5191800594329834, "logps/chosen": -808.206298828125, "logps/rejected": -1101.503662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 15.305819511413574, "rewards/margins": 30.041492462158203, "rewards/rejected": -14.735674858093262, "step": 35 }, { "epoch": 0.16, "grad_norm": 8.451875288528754e-08, "learning_rate": 4.98929052218411e-05, "logits/chosen": -0.08291908353567123, "logits/rejected": -1.5926605463027954, "logps/chosen": -799.0426025390625, "logps/rejected": -941.26025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 16.93221664428711, "rewards/margins": 32.224510192871094, "rewards/rejected": -15.292287826538086, "step": 40 }, { "epoch": 0.18, "grad_norm": 6.276909547686671e-11, "learning_rate": 4.980286753286195e-05, "logits/chosen": -0.07936549186706543, "logits/rejected": -1.6235363483428955, "logps/chosen": -739.1414794921875, "logps/rejected": -916.921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.33672523498535, "rewards/margins": 34.177513122558594, "rewards/rejected": -15.840789794921875, "step": 45 }, { "epoch": 0.2, "grad_norm": 6.576159755633526e-11, "learning_rate": 4.968570827238764e-05, "logits/chosen": -0.01292533241212368, "logits/rejected": -1.3671391010284424, "logps/chosen": -804.4974365234375, "logps/rejected": -1069.5924072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.07866668701172, "rewards/margins": 38.214569091796875, "rewards/rejected": -20.13589859008789, "step": 50 }, { "epoch": 0.22, "grad_norm": 9.919030007077279e-12, "learning_rate": 4.95415555523494e-05, "logits/chosen": 0.1709265410900116, "logits/rejected": -1.2384920120239258, "logps/chosen": -947.8603515625, "logps/rejected": -1064.900146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.031835556030273, "rewards/margins": 38.730384826660156, "rewards/rejected": -19.698551177978516, "step": 55 }, { "epoch": 0.24, "grad_norm": 8.758817560128307e-10, "learning_rate": 4.937056700163015e-05, "logits/chosen": 0.06906407326459885, "logits/rejected": -1.3715310096740723, "logps/chosen": -822.6943359375, "logps/rejected": -1030.0531005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.58559799194336, "rewards/margins": 39.85332107543945, "rewards/rejected": -20.267719268798828, "step": 60 }, { "epoch": 0.26, "grad_norm": 7.939841546820287e-14, "learning_rate": 4.917292959369968e-05, "logits/chosen": 0.12646104395389557, "logits/rejected": -1.2485965490341187, "logps/chosen": -848.3450927734375, "logps/rejected": -1009.3894653320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.458877563476562, "rewards/margins": 38.36582946777344, "rewards/rejected": -19.906951904296875, "step": 65 }, { "epoch": 0.28, "grad_norm": 5.7889120164134056e-09, "learning_rate": 4.8948859442161874e-05, "logits/chosen": -0.05368973687291145, "logits/rejected": -1.1779694557189941, "logps/chosen": -843.9136962890625, "logps/rejected": -1093.84765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.823673248291016, "rewards/margins": 40.01471710205078, "rewards/rejected": -21.1910457611084, "step": 70 }, { "epoch": 0.3, "grad_norm": 2.912129630321114e-13, "learning_rate": 4.8698601564437675e-05, "logits/chosen": 0.21445438265800476, "logits/rejected": -1.3090651035308838, "logps/chosen": -829.6139526367188, "logps/rejected": -967.4557495117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.882862091064453, "rewards/margins": 38.040679931640625, "rewards/rejected": -19.157821655273438, "step": 75 }, { "epoch": 0.32, "grad_norm": 4.708504026669329e-14, "learning_rate": 4.842242961384211e-05, "logits/chosen": 0.24517253041267395, "logits/rejected": -1.157088041305542, "logps/chosen": -695.1763305664062, "logps/rejected": -1113.8963623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.196081161499023, "rewards/margins": 39.54194259643555, "rewards/rejected": -21.34585952758789, "step": 80 }, { "epoch": 0.34, "grad_norm": 2.941465566128412e-14, "learning_rate": 4.812064558034847e-05, "logits/chosen": 0.2809743285179138, "logits/rejected": NaN, "logps/chosen": -787.4198608398438, "logps/rejected": -980.9554443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.84877586364746, "rewards/margins": 38.223331451416016, "rewards/rejected": -20.374557495117188, "step": 85 }, { "epoch": 0.36, "grad_norm": 3.4191766171837168e-12, "learning_rate": 4.779357946036661e-05, "logits/chosen": 0.09962861239910126, "logits/rejected": NaN, "logps/chosen": -801.5222778320312, "logps/rejected": -1071.500732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.405176162719727, "rewards/margins": 38.406497955322266, "rewards/rejected": -21.00132179260254, "step": 90 }, { "epoch": 0.38, "grad_norm": 4.459133756684608e-11, "learning_rate": 4.7441588895896805e-05, "logits/chosen": 0.2806223928928375, "logits/rejected": -1.0863935947418213, "logps/chosen": -767.6072998046875, "logps/rejected": -1064.6678466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.554723739624023, "rewards/margins": 37.88628387451172, "rewards/rejected": -19.33156394958496, "step": 95 }, { "epoch": 0.4, "grad_norm": 1.7513205783584868e-13, "learning_rate": 4.7065058783453424e-05, "logits/chosen": 0.056603264063596725, "logits/rejected": -1.2640782594680786, "logps/chosen": -806.6994018554688, "logps/rejected": -1040.3111572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.12691879272461, "rewards/margins": 39.593467712402344, "rewards/rejected": -20.466548919677734, "step": 100 }, { "epoch": 0.42, "grad_norm": 1.6995760354632239e-09, "learning_rate": 4.666440085318626e-05, "logits/chosen": 0.10824018716812134, "logits/rejected": NaN, "logps/chosen": -879.4221801757812, "logps/rejected": -935.998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.599552154541016, "rewards/margins": 38.348724365234375, "rewards/rejected": -19.74917221069336, "step": 105 }, { "epoch": 0.44, "grad_norm": 2.936039411594038e-09, "learning_rate": 4.6240053218659674e-05, "logits/chosen": 0.13882726430892944, "logits/rejected": NaN, "logps/chosen": -952.7330932617188, "logps/rejected": -1139.3414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.053531646728516, "rewards/margins": 39.75517272949219, "rewards/rejected": -20.701641082763672, "step": 110 }, { "epoch": 0.46, "grad_norm": 6.191885337791803e-10, "learning_rate": 4.579247989778179e-05, "logits/chosen": -0.0359259769320488, "logits/rejected": NaN, "logps/chosen": -923.9146728515625, "logps/rejected": -1042.310302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 20.208402633666992, "rewards/margins": 40.31129455566406, "rewards/rejected": -20.102888107299805, "step": 115 }, { "epoch": 0.48, "grad_norm": 1.4515948122337252e-12, "learning_rate": 4.532217030540781e-05, "logits/chosen": 0.15440431237220764, "logits/rejected": -1.2660012245178223, "logps/chosen": -851.1140747070312, "logps/rejected": -1047.481201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.158273696899414, "rewards/margins": 40.312068939208984, "rewards/rejected": -21.153797149658203, "step": 120 }, { "epoch": 0.5, "grad_norm": 1.5757608759492392e-11, "learning_rate": 4.4829638718171954e-05, "logits/chosen": 0.3456230163574219, "logits/rejected": -1.1057803630828857, "logps/chosen": -893.2120361328125, "logps/rejected": -1084.09228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.828678131103516, "rewards/margins": 38.39183807373047, "rewards/rejected": -20.563154220581055, "step": 125 }, { "epoch": 0.52, "grad_norm": 2.1954889349671408e-13, "learning_rate": 4.4315423712133595e-05, "logits/chosen": -0.05226398631930351, "logits/rejected": -1.1536673307418823, "logps/chosen": -912.1346435546875, "logps/rejected": -1105.4722900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.78438377380371, "rewards/margins": 39.1011962890625, "rewards/rejected": -20.316814422607422, "step": 130 }, { "epoch": 0.54, "grad_norm": 5.803347819660631e-12, "learning_rate": 4.3780087573852213e-05, "logits/chosen": -0.027118748053908348, "logits/rejected": NaN, "logps/chosen": -879.7356567382812, "logps/rejected": -1004.5798950195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.120731353759766, "rewards/margins": 39.363121032714844, "rewards/rejected": -20.242389678955078, "step": 135 }, { "epoch": 0.56, "grad_norm": 3.3039590507932814e-10, "learning_rate": 4.3224215685535294e-05, "logits/chosen": 0.14910843968391418, "logits/rejected": -1.2413218021392822, "logps/chosen": -663.5389404296875, "logps/rejected": -1058.497314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.181045532226562, "rewards/margins": 37.49201202392578, "rewards/rejected": -20.310962677001953, "step": 140 }, { "epoch": 0.58, "grad_norm": 8.77260845300043e-07, "learning_rate": 4.2648415884931476e-05, "logits/chosen": 0.055362772196531296, "logits/rejected": -1.4401803016662598, "logps/chosen": -692.7725830078125, "logps/rejected": -937.9246215820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.841815948486328, "rewards/margins": 36.7285041809082, "rewards/rejected": -18.886686325073242, "step": 145 }, { "epoch": 0.6, "grad_norm": 9.572725065702583e-15, "learning_rate": 4.205331780066892e-05, "logits/chosen": 0.10420503467321396, "logits/rejected": -1.265973687171936, "logps/chosen": -759.1443481445312, "logps/rejected": -1018.9404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.249958038330078, "rewards/margins": 38.52296447753906, "rewards/rejected": -20.273006439208984, "step": 150 }, { "epoch": 0.62, "grad_norm": 1.0893125811095583e-11, "learning_rate": 4.143957216376561e-05, "logits/chosen": 0.39625436067581177, "logits/rejected": NaN, "logps/chosen": -825.2239990234375, "logps/rejected": -1143.4208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.700899124145508, "rewards/margins": 38.6351432800293, "rewards/rejected": -20.934249877929688, "step": 155 }, { "epoch": 0.64, "grad_norm": 2.4590178782091243e-07, "learning_rate": 4.0807850096064605e-05, "logits/chosen": -0.03116157092154026, "logits/rejected": NaN, "logps/chosen": -857.17431640625, "logps/rejected": -1014.2100830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.429927825927734, "rewards/margins": 40.577064514160156, "rewards/rejected": -21.14713478088379, "step": 160 }, { "epoch": 0.66, "grad_norm": 6.329039514696433e-10, "learning_rate": 4.0158842376372064e-05, "logits/chosen": 0.09649789333343506, "logits/rejected": -1.1944949626922607, "logps/chosen": -899.2138671875, "logps/rejected": -1035.7169189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.823169708251953, "rewards/margins": 39.15609359741211, "rewards/rejected": -20.33292579650879, "step": 165 }, { "epoch": 0.68, "grad_norm": 2.8029356613501477e-11, "learning_rate": 3.949325868510083e-05, "logits/chosen": 0.20623020827770233, "logits/rejected": -1.129809856414795, "logps/chosen": -864.5255126953125, "logps/rejected": -1091.5992431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.275066375732422, "rewards/margins": 37.25093460083008, "rewards/rejected": -19.975866317749023, "step": 170 }, { "epoch": 0.7, "grad_norm": 3.3411516825801457e-13, "learning_rate": 3.8811826828245334e-05, "logits/chosen": 0.14641737937927246, "logits/rejected": -1.1825670003890991, "logps/chosen": -762.6082153320312, "logps/rejected": -1034.0648193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.018020629882812, "rewards/margins": 38.99062728881836, "rewards/rejected": -19.972606658935547, "step": 175 }, { "epoch": 0.72, "grad_norm": 2.0999298627017923e-14, "learning_rate": 3.8115291941536345e-05, "logits/chosen": 0.10354921966791153, "logits/rejected": -1.275838017463684, "logps/chosen": -829.5315551757812, "logps/rejected": -1124.622314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.930105209350586, "rewards/margins": 41.81705093383789, "rewards/rejected": -22.886947631835938, "step": 180 }, { "epoch": 0.74, "grad_norm": 2.645327750253057e-12, "learning_rate": 3.7404415675646054e-05, "logits/chosen": 0.031611260026693344, "logits/rejected": -1.3882781267166138, "logps/chosen": -694.6297607421875, "logps/rejected": -1017.4817504882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.6253662109375, "rewards/margins": 39.39641189575195, "rewards/rejected": -19.77104377746582, "step": 185 }, { "epoch": 0.76, "grad_norm": 6.707449873299964e-11, "learning_rate": 3.667997536333424e-05, "logits/chosen": 0.24062354862689972, "logits/rejected": NaN, "logps/chosen": -938.1993408203125, "logps/rejected": -1000.3816528320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.56690216064453, "rewards/margins": 38.47909927368164, "rewards/rejected": -18.912195205688477, "step": 190 }, { "epoch": 0.78, "grad_norm": 8.01747113143847e-09, "learning_rate": 3.5942763169446295e-05, "logits/chosen": 0.24746160209178925, "logits/rejected": -1.2341853380203247, "logps/chosen": -773.4998779296875, "logps/rejected": -1065.763916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 16.8951416015625, "rewards/margins": 37.755271911621094, "rewards/rejected": -20.86013412475586, "step": 195 }, { "epoch": 0.8, "grad_norm": 6.772403471355659e-11, "learning_rate": 3.519358522469259e-05, "logits/chosen": -0.09942921251058578, "logits/rejected": NaN, "logps/chosen": -756.4301147460938, "logps/rejected": -936.4187622070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 20.048839569091797, "rewards/margins": 39.792686462402344, "rewards/rejected": -19.743852615356445, "step": 200 }, { "epoch": 0.82, "grad_norm": 1.032920859955766e-09, "learning_rate": 3.4433260744156396e-05, "logits/chosen": 0.1056319922208786, "logits/rejected": -1.3145227432250977, "logps/chosen": -784.6891479492188, "logps/rejected": -1036.238525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 19.342937469482422, "rewards/margins": 40.68000030517578, "rewards/rejected": -21.337064743041992, "step": 205 }, { "epoch": 0.84, "grad_norm": 2.7231743526812124e-09, "learning_rate": 3.3662621131494204e-05, "logits/chosen": 0.12614893913269043, "logits/rejected": -1.2963688373565674, "logps/chosen": -781.6393432617188, "logps/rejected": -1004.7862548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.965452194213867, "rewards/margins": 37.53193664550781, "rewards/rejected": -19.566486358642578, "step": 210 }, { "epoch": 0.86, "grad_norm": 2.9980299492970985e-11, "learning_rate": 3.2882509069808044e-05, "logits/chosen": 0.17609557509422302, "logits/rejected": -1.2237725257873535, "logps/chosen": -860.0303955078125, "logps/rejected": -1028.668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 20.395902633666992, "rewards/margins": 41.38351058959961, "rewards/rejected": -20.987613677978516, "step": 215 }, { "epoch": 0.88, "grad_norm": 4.3077050613908336e-15, "learning_rate": 3.2093777600183875e-05, "logits/chosen": 0.19297495484352112, "logits/rejected": -1.239027738571167, "logps/chosen": -742.4468994140625, "logps/rejected": -979.86572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.114458084106445, "rewards/margins": 37.93598556518555, "rewards/rejected": -19.821529388427734, "step": 220 }, { "epoch": 0.9, "grad_norm": 7.659513578950339e-10, "learning_rate": 3.129728918890371e-05, "logits/chosen": 0.04276902601122856, "logits/rejected": -1.2328051328659058, "logps/chosen": -925.1580200195312, "logps/rejected": -1018.6541748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.087682723999023, "rewards/margins": 37.770076751708984, "rewards/rejected": -19.68239402770996, "step": 225 }, { "epoch": 0.92, "grad_norm": 1.3605957249751555e-08, "learning_rate": 3.0493914784351328e-05, "logits/chosen": 0.1790432333946228, "logits/rejected": -1.4544163942337036, "logps/chosen": -825.8486328125, "logps/rejected": -938.1361083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.809961318969727, "rewards/margins": 38.43498611450195, "rewards/rejected": -19.62502670288086, "step": 230 }, { "epoch": 0.94, "grad_norm": 1.6664260249488194e-11, "learning_rate": 2.9684532864643122e-05, "logits/chosen": 0.2744317650794983, "logits/rejected": NaN, "logps/chosen": -817.6921997070312, "logps/rejected": -1043.99462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 16.49943733215332, "rewards/margins": 37.538238525390625, "rewards/rejected": -21.038799285888672, "step": 235 }, { "epoch": 0.96, "grad_norm": 1.0462511662156365e-13, "learning_rate": 2.8870028477025042e-05, "logits/chosen": 0.19406965374946594, "logits/rejected": NaN, "logps/chosen": -763.9988403320312, "logps/rejected": -1111.3155517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 17.56269073486328, "rewards/margins": 39.26129913330078, "rewards/rejected": -21.698612213134766, "step": 240 }, { "epoch": 0.98, "grad_norm": 8.816194281280332e-07, "learning_rate": 2.8051292270086503e-05, "logits/chosen": 0.19585652649402618, "logits/rejected": -1.20717453956604, "logps/chosen": -778.9284057617188, "logps/rejected": -1086.0411376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.085580825805664, "rewards/margins": 37.675872802734375, "rewards/rejected": -19.590295791625977, "step": 245 }, { "epoch": 1.0, "grad_norm": 1.960840691206389e-11, "learning_rate": 2.722921951984927e-05, "logits/chosen": 0.255943238735199, "logits/rejected": NaN, "logps/chosen": -806.9491577148438, "logps/rejected": -977.7510986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 18.35710906982422, "rewards/margins": 37.92182540893555, "rewards/rejected": -19.56471824645996, "step": 250 } ], "logging_steps": 5, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }