| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 32.68030548095703, | |
| "learning_rate": 8.000000000000001e-06, | |
| "logits/chosen": -1.6396703720092773, | |
| "logits/rejected": -4.051800727844238, | |
| "logps/chosen": -991.9453125, | |
| "logps/rejected": -804.3565673828125, | |
| "loss": 0.6639, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.03835174813866615, | |
| "rewards/margins": 0.06670190393924713, | |
| "rewards/rejected": -0.028350140899419785, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 15.304901123046875, | |
| "learning_rate": 1.8e-05, | |
| "logits/chosen": -1.3240370750427246, | |
| "logits/rejected": -3.8731656074523926, | |
| "logps/chosen": -1051.1619873046875, | |
| "logps/rejected": -844.4005126953125, | |
| "loss": 0.4827, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.36078932881355286, | |
| "rewards/margins": 0.5343167185783386, | |
| "rewards/rejected": -0.17352741956710815, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.7237646579742432, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "logits/chosen": -1.3096798658370972, | |
| "logits/rejected": -3.6570611000061035, | |
| "logps/chosen": -1035.888427734375, | |
| "logps/rejected": -842.5685424804688, | |
| "loss": 0.0832, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.0725021362304688, | |
| "rewards/margins": 3.071218729019165, | |
| "rewards/rejected": -0.9987167119979858, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.0076161520555615425, | |
| "learning_rate": 3.8e-05, | |
| "logits/chosen": -0.8284958004951477, | |
| "logits/rejected": -2.9240927696228027, | |
| "logps/chosen": -870.0358276367188, | |
| "logps/rejected": -1002.2626953125, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.352972507476807, | |
| "rewards/margins": 8.709424018859863, | |
| "rewards/rejected": -3.3564505577087402, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.004960057325661182, | |
| "learning_rate": 4.8e-05, | |
| "logits/chosen": -0.6791419982910156, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -874.9759521484375, | |
| "logps/rejected": -876.6950073242188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 10.639605522155762, | |
| "rewards/margins": 17.804119110107422, | |
| "rewards/rejected": -7.164514064788818, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.3971771295473445e-07, | |
| "learning_rate": 4.999125183044924e-05, | |
| "logits/chosen": -0.34096163511276245, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -939.2772216796875, | |
| "logps/rejected": -893.369140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 14.58117961883545, | |
| "rewards/margins": 25.044315338134766, | |
| "rewards/rejected": -10.463134765625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.2492258338880902e-09, | |
| "learning_rate": 4.9955722884434114e-05, | |
| "logits/chosen": -0.140629380941391, | |
| "logits/rejected": -1.5191800594329834, | |
| "logps/chosen": -808.206298828125, | |
| "logps/rejected": -1101.503662109375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 15.305819511413574, | |
| "rewards/margins": 30.041492462158203, | |
| "rewards/rejected": -14.735674858093262, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 8.451875288528754e-08, | |
| "learning_rate": 4.98929052218411e-05, | |
| "logits/chosen": -0.08291908353567123, | |
| "logits/rejected": -1.5926605463027954, | |
| "logps/chosen": -799.0426025390625, | |
| "logps/rejected": -941.26025390625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 16.93221664428711, | |
| "rewards/margins": 32.224510192871094, | |
| "rewards/rejected": -15.292287826538086, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 6.276909547686671e-11, | |
| "learning_rate": 4.980286753286195e-05, | |
| "logits/chosen": -0.07936549186706543, | |
| "logits/rejected": -1.6235363483428955, | |
| "logps/chosen": -739.1414794921875, | |
| "logps/rejected": -916.921875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.33672523498535, | |
| "rewards/margins": 34.177513122558594, | |
| "rewards/rejected": -15.840789794921875, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 6.576159755633526e-11, | |
| "learning_rate": 4.968570827238764e-05, | |
| "logits/chosen": -0.01292533241212368, | |
| "logits/rejected": -1.3671391010284424, | |
| "logps/chosen": -804.4974365234375, | |
| "logps/rejected": -1069.5924072265625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.07866668701172, | |
| "rewards/margins": 38.214569091796875, | |
| "rewards/rejected": -20.13589859008789, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 9.919030007077279e-12, | |
| "learning_rate": 4.95415555523494e-05, | |
| "logits/chosen": 0.1709265410900116, | |
| "logits/rejected": -1.2384920120239258, | |
| "logps/chosen": -947.8603515625, | |
| "logps/rejected": -1064.900146484375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.031835556030273, | |
| "rewards/margins": 38.730384826660156, | |
| "rewards/rejected": -19.698551177978516, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 8.758817560128307e-10, | |
| "learning_rate": 4.937056700163015e-05, | |
| "logits/chosen": 0.06906407326459885, | |
| "logits/rejected": -1.3715310096740723, | |
| "logps/chosen": -822.6943359375, | |
| "logps/rejected": -1030.0531005859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.58559799194336, | |
| "rewards/margins": 39.85332107543945, | |
| "rewards/rejected": -20.267719268798828, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.939841546820287e-14, | |
| "learning_rate": 4.917292959369968e-05, | |
| "logits/chosen": 0.12646104395389557, | |
| "logits/rejected": -1.2485965490341187, | |
| "logps/chosen": -848.3450927734375, | |
| "logps/rejected": -1009.3894653320312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.458877563476562, | |
| "rewards/margins": 38.36582946777344, | |
| "rewards/rejected": -19.906951904296875, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 5.7889120164134056e-09, | |
| "learning_rate": 4.8948859442161874e-05, | |
| "logits/chosen": -0.05368973687291145, | |
| "logits/rejected": -1.1779694557189941, | |
| "logps/chosen": -843.9136962890625, | |
| "logps/rejected": -1093.84765625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.823673248291016, | |
| "rewards/margins": 40.01471710205078, | |
| "rewards/rejected": -21.1910457611084, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.912129630321114e-13, | |
| "learning_rate": 4.8698601564437675e-05, | |
| "logits/chosen": 0.21445438265800476, | |
| "logits/rejected": -1.3090651035308838, | |
| "logps/chosen": -829.6139526367188, | |
| "logps/rejected": -967.4557495117188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.882862091064453, | |
| "rewards/margins": 38.040679931640625, | |
| "rewards/rejected": -19.157821655273438, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.708504026669329e-14, | |
| "learning_rate": 4.842242961384211e-05, | |
| "logits/chosen": 0.24517253041267395, | |
| "logits/rejected": -1.157088041305542, | |
| "logps/chosen": -695.1763305664062, | |
| "logps/rejected": -1113.8963623046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.196081161499023, | |
| "rewards/margins": 39.54194259643555, | |
| "rewards/rejected": -21.34585952758789, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.941465566128412e-14, | |
| "learning_rate": 4.812064558034847e-05, | |
| "logits/chosen": 0.2809743285179138, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -787.4198608398438, | |
| "logps/rejected": -980.9554443359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.84877586364746, | |
| "rewards/margins": 38.223331451416016, | |
| "rewards/rejected": -20.374557495117188, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 3.4191766171837168e-12, | |
| "learning_rate": 4.779357946036661e-05, | |
| "logits/chosen": 0.09962861239910126, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -801.5222778320312, | |
| "logps/rejected": -1071.500732421875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.405176162719727, | |
| "rewards/margins": 38.406497955322266, | |
| "rewards/rejected": -21.00132179260254, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 4.459133756684608e-11, | |
| "learning_rate": 4.7441588895896805e-05, | |
| "logits/chosen": 0.2806223928928375, | |
| "logits/rejected": -1.0863935947418213, | |
| "logps/chosen": -767.6072998046875, | |
| "logps/rejected": -1064.6678466796875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.554723739624023, | |
| "rewards/margins": 37.88628387451172, | |
| "rewards/rejected": -19.33156394958496, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.7513205783584868e-13, | |
| "learning_rate": 4.7065058783453424e-05, | |
| "logits/chosen": 0.056603264063596725, | |
| "logits/rejected": -1.2640782594680786, | |
| "logps/chosen": -806.6994018554688, | |
| "logps/rejected": -1040.3111572265625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.12691879272461, | |
| "rewards/margins": 39.593467712402344, | |
| "rewards/rejected": -20.466548919677734, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.6995760354632239e-09, | |
| "learning_rate": 4.666440085318626e-05, | |
| "logits/chosen": 0.10824018716812134, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -879.4221801757812, | |
| "logps/rejected": -935.998046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.599552154541016, | |
| "rewards/margins": 38.348724365234375, | |
| "rewards/rejected": -19.74917221069336, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.936039411594038e-09, | |
| "learning_rate": 4.6240053218659674e-05, | |
| "logits/chosen": 0.13882726430892944, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -952.7330932617188, | |
| "logps/rejected": -1139.3414306640625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.053531646728516, | |
| "rewards/margins": 39.75517272949219, | |
| "rewards/rejected": -20.701641082763672, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 6.191885337791803e-10, | |
| "learning_rate": 4.579247989778179e-05, | |
| "logits/chosen": -0.0359259769320488, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -923.9146728515625, | |
| "logps/rejected": -1042.310302734375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.208402633666992, | |
| "rewards/margins": 40.31129455566406, | |
| "rewards/rejected": -20.102888107299805, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.4515948122337252e-12, | |
| "learning_rate": 4.532217030540781e-05, | |
| "logits/chosen": 0.15440431237220764, | |
| "logits/rejected": -1.2660012245178223, | |
| "logps/chosen": -851.1140747070312, | |
| "logps/rejected": -1047.481201171875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.158273696899414, | |
| "rewards/margins": 40.312068939208984, | |
| "rewards/rejected": -21.153797149658203, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.5757608759492392e-11, | |
| "learning_rate": 4.4829638718171954e-05, | |
| "logits/chosen": 0.3456230163574219, | |
| "logits/rejected": -1.1057803630828857, | |
| "logps/chosen": -893.2120361328125, | |
| "logps/rejected": -1084.09228515625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.828678131103516, | |
| "rewards/margins": 38.39183807373047, | |
| "rewards/rejected": -20.563154220581055, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 2.1954889349671408e-13, | |
| "learning_rate": 4.4315423712133595e-05, | |
| "logits/chosen": -0.05226398631930351, | |
| "logits/rejected": -1.1536673307418823, | |
| "logps/chosen": -912.1346435546875, | |
| "logps/rejected": -1105.4722900390625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.78438377380371, | |
| "rewards/margins": 39.1011962890625, | |
| "rewards/rejected": -20.316814422607422, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 5.803347819660631e-12, | |
| "learning_rate": 4.3780087573852213e-05, | |
| "logits/chosen": -0.027118748053908348, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -879.7356567382812, | |
| "logps/rejected": -1004.5798950195312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.120731353759766, | |
| "rewards/margins": 39.363121032714844, | |
| "rewards/rejected": -20.242389678955078, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 3.3039590507932814e-10, | |
| "learning_rate": 4.3224215685535294e-05, | |
| "logits/chosen": 0.14910843968391418, | |
| "logits/rejected": -1.2413218021392822, | |
| "logps/chosen": -663.5389404296875, | |
| "logps/rejected": -1058.497314453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.181045532226562, | |
| "rewards/margins": 37.49201202392578, | |
| "rewards/rejected": -20.310962677001953, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 8.77260845300043e-07, | |
| "learning_rate": 4.2648415884931476e-05, | |
| "logits/chosen": 0.055362772196531296, | |
| "logits/rejected": -1.4401803016662598, | |
| "logps/chosen": -692.7725830078125, | |
| "logps/rejected": -937.9246215820312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.841815948486328, | |
| "rewards/margins": 36.7285041809082, | |
| "rewards/rejected": -18.886686325073242, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 9.572725065702583e-15, | |
| "learning_rate": 4.205331780066892e-05, | |
| "logits/chosen": 0.10420503467321396, | |
| "logits/rejected": -1.265973687171936, | |
| "logps/chosen": -759.1443481445312, | |
| "logps/rejected": -1018.9404296875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.249958038330078, | |
| "rewards/margins": 38.52296447753906, | |
| "rewards/rejected": -20.273006439208984, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.0893125811095583e-11, | |
| "learning_rate": 4.143957216376561e-05, | |
| "logits/chosen": 0.39625436067581177, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -825.2239990234375, | |
| "logps/rejected": -1143.4208984375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.700899124145508, | |
| "rewards/margins": 38.6351432800293, | |
| "rewards/rejected": -20.934249877929688, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.4590178782091243e-07, | |
| "learning_rate": 4.0807850096064605e-05, | |
| "logits/chosen": -0.03116157092154026, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -857.17431640625, | |
| "logps/rejected": -1014.2100830078125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.429927825927734, | |
| "rewards/margins": 40.577064514160156, | |
| "rewards/rejected": -21.14713478088379, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 6.329039514696433e-10, | |
| "learning_rate": 4.0158842376372064e-05, | |
| "logits/chosen": 0.09649789333343506, | |
| "logits/rejected": -1.1944949626922607, | |
| "logps/chosen": -899.2138671875, | |
| "logps/rejected": -1035.7169189453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.823169708251953, | |
| "rewards/margins": 39.15609359741211, | |
| "rewards/rejected": -20.33292579650879, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 2.8029356613501477e-11, | |
| "learning_rate": 3.949325868510083e-05, | |
| "logits/chosen": 0.20623020827770233, | |
| "logits/rejected": -1.129809856414795, | |
| "logps/chosen": -864.5255126953125, | |
| "logps/rejected": -1091.5992431640625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.275066375732422, | |
| "rewards/margins": 37.25093460083008, | |
| "rewards/rejected": -19.975866317749023, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 3.3411516825801457e-13, | |
| "learning_rate": 3.8811826828245334e-05, | |
| "logits/chosen": 0.14641737937927246, | |
| "logits/rejected": -1.1825670003890991, | |
| "logps/chosen": -762.6082153320312, | |
| "logps/rejected": -1034.0648193359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.018020629882812, | |
| "rewards/margins": 38.99062728881836, | |
| "rewards/rejected": -19.972606658935547, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.0999298627017923e-14, | |
| "learning_rate": 3.8115291941536345e-05, | |
| "logits/chosen": 0.10354921966791153, | |
| "logits/rejected": -1.275838017463684, | |
| "logps/chosen": -829.5315551757812, | |
| "logps/rejected": -1124.622314453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.930105209350586, | |
| "rewards/margins": 41.81705093383789, | |
| "rewards/rejected": -22.886947631835938, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 2.645327750253057e-12, | |
| "learning_rate": 3.7404415675646054e-05, | |
| "logits/chosen": 0.031611260026693344, | |
| "logits/rejected": -1.3882781267166138, | |
| "logps/chosen": -694.6297607421875, | |
| "logps/rejected": -1017.4817504882812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.6253662109375, | |
| "rewards/margins": 39.39641189575195, | |
| "rewards/rejected": -19.77104377746582, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 6.707449873299964e-11, | |
| "learning_rate": 3.667997536333424e-05, | |
| "logits/chosen": 0.24062354862689972, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -938.1993408203125, | |
| "logps/rejected": -1000.3816528320312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.56690216064453, | |
| "rewards/margins": 38.47909927368164, | |
| "rewards/rejected": -18.912195205688477, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 8.01747113143847e-09, | |
| "learning_rate": 3.5942763169446295e-05, | |
| "logits/chosen": 0.24746160209178925, | |
| "logits/rejected": -1.2341853380203247, | |
| "logps/chosen": -773.4998779296875, | |
| "logps/rejected": -1065.763916015625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 16.8951416015625, | |
| "rewards/margins": 37.755271911621094, | |
| "rewards/rejected": -20.86013412475586, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 6.772403471355659e-11, | |
| "learning_rate": 3.519358522469259e-05, | |
| "logits/chosen": -0.09942921251058578, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -756.4301147460938, | |
| "logps/rejected": -936.4187622070312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.048839569091797, | |
| "rewards/margins": 39.792686462402344, | |
| "rewards/rejected": -19.743852615356445, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.032920859955766e-09, | |
| "learning_rate": 3.4433260744156396e-05, | |
| "logits/chosen": 0.1056319922208786, | |
| "logits/rejected": -1.3145227432250977, | |
| "logps/chosen": -784.6891479492188, | |
| "logps/rejected": -1036.238525390625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.342937469482422, | |
| "rewards/margins": 40.68000030517578, | |
| "rewards/rejected": -21.337064743041992, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.7231743526812124e-09, | |
| "learning_rate": 3.3662621131494204e-05, | |
| "logits/chosen": 0.12614893913269043, | |
| "logits/rejected": -1.2963688373565674, | |
| "logps/chosen": -781.6393432617188, | |
| "logps/rejected": -1004.7862548828125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.965452194213867, | |
| "rewards/margins": 37.53193664550781, | |
| "rewards/rejected": -19.566486358642578, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.9980299492970985e-11, | |
| "learning_rate": 3.2882509069808044e-05, | |
| "logits/chosen": 0.17609557509422302, | |
| "logits/rejected": -1.2237725257873535, | |
| "logps/chosen": -860.0303955078125, | |
| "logps/rejected": -1028.668701171875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.395902633666992, | |
| "rewards/margins": 41.38351058959961, | |
| "rewards/rejected": -20.987613677978516, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 4.3077050613908336e-15, | |
| "learning_rate": 3.2093777600183875e-05, | |
| "logits/chosen": 0.19297495484352112, | |
| "logits/rejected": -1.239027738571167, | |
| "logps/chosen": -742.4468994140625, | |
| "logps/rejected": -979.86572265625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.114458084106445, | |
| "rewards/margins": 37.93598556518555, | |
| "rewards/rejected": -19.821529388427734, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 7.659513578950339e-10, | |
| "learning_rate": 3.129728918890371e-05, | |
| "logits/chosen": 0.04276902601122856, | |
| "logits/rejected": -1.2328051328659058, | |
| "logps/chosen": -925.1580200195312, | |
| "logps/rejected": -1018.6541748046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.087682723999023, | |
| "rewards/margins": 37.770076751708984, | |
| "rewards/rejected": -19.68239402770996, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.3605957249751555e-08, | |
| "learning_rate": 3.0493914784351328e-05, | |
| "logits/chosen": 0.1790432333946228, | |
| "logits/rejected": -1.4544163942337036, | |
| "logps/chosen": -825.8486328125, | |
| "logps/rejected": -938.1361083984375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.809961318969727, | |
| "rewards/margins": 38.43498611450195, | |
| "rewards/rejected": -19.62502670288086, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.6664260249488194e-11, | |
| "learning_rate": 2.9684532864643122e-05, | |
| "logits/chosen": 0.2744317650794983, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -817.6921997070312, | |
| "logps/rejected": -1043.99462890625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 16.49943733215332, | |
| "rewards/margins": 37.538238525390625, | |
| "rewards/rejected": -21.038799285888672, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0462511662156365e-13, | |
| "learning_rate": 2.8870028477025042e-05, | |
| "logits/chosen": 0.19406965374946594, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -763.9988403320312, | |
| "logps/rejected": -1111.3155517578125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.56269073486328, | |
| "rewards/margins": 39.26129913330078, | |
| "rewards/rejected": -21.698612213134766, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 8.816194281280332e-07, | |
| "learning_rate": 2.8051292270086503e-05, | |
| "logits/chosen": 0.19585652649402618, | |
| "logits/rejected": -1.20717453956604, | |
| "logps/chosen": -778.9284057617188, | |
| "logps/rejected": -1086.0411376953125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.085580825805664, | |
| "rewards/margins": 37.675872802734375, | |
| "rewards/rejected": -19.590295791625977, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.960840691206389e-11, | |
| "learning_rate": 2.722921951984927e-05, | |
| "logits/chosen": 0.255943238735199, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -806.9491577148438, | |
| "logps/rejected": -977.7510986328125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.35710906982422, | |
| "rewards/margins": 37.92182540893555, | |
| "rewards/rejected": -19.56471824645996, | |
| "step": 250 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |