| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 1000, | |
| "global_step": 375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3157894736842104e-08, | |
| "logits/chosen": -0.5324900150299072, | |
| "logits/rejected": -0.5734304189682007, | |
| "logps/chosen": -543.2296752929688, | |
| "logps/rejected": -325.48358154296875, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/mix_margin": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3157894736842104e-07, | |
| "logits/chosen": -0.48575523495674133, | |
| "logits/rejected": -0.5831019878387451, | |
| "logps/chosen": -334.6309509277344, | |
| "logps/rejected": -278.2859802246094, | |
| "loss": 0.6997, | |
| "rewards/accuracies": 0.4027777910232544, | |
| "rewards/chosen": -0.012853524647653103, | |
| "rewards/confidence": -0.0746772438287735, | |
| "rewards/confidence_mean_diff": 0.0746772438287735, | |
| "rewards/confidence_moving_diff": 0.0021637948229908943, | |
| "rewards/margins": -0.007044664584100246, | |
| "rewards/mix_margin": -0.007044283673167229, | |
| "rewards/real_percentage": 14.129032135009766, | |
| "rewards/rejected": -0.005808859597891569, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.631578947368421e-07, | |
| "logits/chosen": -0.45206984877586365, | |
| "logits/rejected": -0.4436320662498474, | |
| "logps/chosen": -378.46478271484375, | |
| "logps/rejected": -291.097412109375, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.019860025495290756, | |
| "rewards/confidence": -0.07699747383594513, | |
| "rewards/confidence_mean_diff": 0.07699747383594513, | |
| "rewards/confidence_moving_diff": -6.244657561182976e-05, | |
| "rewards/margins": 0.010339610278606415, | |
| "rewards/mix_margin": 0.010339389555156231, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": 0.009520411491394043, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.9473684210526315e-07, | |
| "logits/chosen": -0.48425692319869995, | |
| "logits/rejected": -0.5238968133926392, | |
| "logps/chosen": -363.4825439453125, | |
| "logps/rejected": -330.880859375, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.07081757485866547, | |
| "rewards/confidence": -0.0583333782851696, | |
| "rewards/confidence_mean_diff": 0.0583333782851696, | |
| "rewards/confidence_moving_diff": 0.00017116544768214226, | |
| "rewards/margins": 0.04097529500722885, | |
| "rewards/mix_margin": 0.04097532853484154, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": 0.029842281714081764, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.999565492409831e-07, | |
| "logits/chosen": -0.47305864095687866, | |
| "logits/rejected": -0.582284152507782, | |
| "logps/chosen": -335.81610107421875, | |
| "logps/rejected": -256.0378723144531, | |
| "loss": 0.6474, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.16260090470314026, | |
| "rewards/confidence": -0.11778082698583603, | |
| "rewards/confidence_mean_diff": 0.11778082698583603, | |
| "rewards/confidence_moving_diff": 0.0008547043544240296, | |
| "rewards/margins": 0.05970517918467522, | |
| "rewards/mix_margin": 0.05970512703061104, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": 0.10289572179317474, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.984373579809777e-07, | |
| "logits/chosen": -0.5092490911483765, | |
| "logits/rejected": -0.5690798163414001, | |
| "logps/chosen": -329.53302001953125, | |
| "logps/rejected": -295.02294921875, | |
| "loss": 0.5866, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.3996647000312805, | |
| "rewards/confidence": -0.14608541131019592, | |
| "rewards/confidence_mean_diff": 0.14608541131019592, | |
| "rewards/confidence_moving_diff": -0.00040556181920692325, | |
| "rewards/margins": 0.20342092216014862, | |
| "rewards/mix_margin": 0.2034207135438919, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": 0.1962437778711319, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.947607089353757e-07, | |
| "logits/chosen": -0.4855988025665283, | |
| "logits/rejected": -0.5692173838615417, | |
| "logps/chosen": -365.7965393066406, | |
| "logps/rejected": -290.7939147949219, | |
| "loss": 0.6262, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.6372241973876953, | |
| "rewards/confidence": -0.27295011281967163, | |
| "rewards/confidence_mean_diff": 0.27295011281967163, | |
| "rewards/confidence_moving_diff": -8.605476614320651e-05, | |
| "rewards/margins": 0.25993281602859497, | |
| "rewards/mix_margin": 0.2599331736564636, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": 0.37729138135910034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.889585305354435e-07, | |
| "logits/chosen": -0.511881411075592, | |
| "logits/rejected": -0.5559085607528687, | |
| "logps/chosen": -374.42559814453125, | |
| "logps/rejected": -350.49285888671875, | |
| "loss": 0.5776, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.938610851764679, | |
| "rewards/confidence": -0.17002172768115997, | |
| "rewards/confidence_mean_diff": 0.17002172768115997, | |
| "rewards/confidence_moving_diff": 0.006227460689842701, | |
| "rewards/margins": 0.39301368594169617, | |
| "rewards/mix_margin": 0.39301276206970215, | |
| "rewards/real_percentage": 12.199999809265137, | |
| "rewards/rejected": 0.5455971360206604, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.810812095469401e-07, | |
| "logits/chosen": -0.4341855049133301, | |
| "logits/rejected": -0.4922330975532532, | |
| "logps/chosen": -382.85986328125, | |
| "logps/rejected": -316.0860595703125, | |
| "loss": 0.4931, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 1.085030198097229, | |
| "rewards/confidence": -0.36220604181289673, | |
| "rewards/confidence_mean_diff": 0.36220604181289673, | |
| "rewards/confidence_moving_diff": -0.004994163755327463, | |
| "rewards/margins": 0.727383017539978, | |
| "rewards/mix_margin": 0.7273828387260437, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": 0.3576471507549286, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.711971535058109e-07, | |
| "logits/chosen": -0.4119408130645752, | |
| "logits/rejected": -0.5046309232711792, | |
| "logps/chosen": -335.9080810546875, | |
| "logps/rejected": -228.6096649169922, | |
| "loss": 0.5641, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.9102222323417664, | |
| "rewards/confidence": -0.18118831515312195, | |
| "rewards/confidence_mean_diff": 0.18118831515312195, | |
| "rewards/confidence_moving_diff": 0.0009786130394786596, | |
| "rewards/margins": 0.6580663919448853, | |
| "rewards/mix_margin": 0.6580665707588196, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": 0.2521558403968811, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.593921966594997e-07, | |
| "logits/chosen": -0.4459192752838135, | |
| "logits/rejected": -0.4894910454750061, | |
| "logps/chosen": -371.5416259765625, | |
| "logps/rejected": -310.64239501953125, | |
| "loss": 0.5469, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 1.2468544244766235, | |
| "rewards/confidence": -0.45958179235458374, | |
| "rewards/confidence_mean_diff": 0.45958179235458374, | |
| "rewards/confidence_moving_diff": 0.004259251989424229, | |
| "rewards/margins": 0.7577625513076782, | |
| "rewards/mix_margin": 0.757762610912323, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": 0.4890917241573334, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.457688545727496e-07, | |
| "logits/chosen": -0.5113216042518616, | |
| "logits/rejected": -0.5288140177726746, | |
| "logps/chosen": -352.3919982910156, | |
| "logps/rejected": -276.9599304199219, | |
| "loss": 0.5222, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 1.0378813743591309, | |
| "rewards/confidence": -0.29791101813316345, | |
| "rewards/confidence_mean_diff": 0.29791101813316345, | |
| "rewards/confidence_moving_diff": -0.0015016455436125398, | |
| "rewards/margins": 0.7134403586387634, | |
| "rewards/mix_margin": 0.7134405374526978, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": 0.3244408965110779, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.3044543387098026e-07, | |
| "logits/chosen": -0.5033639669418335, | |
| "logits/rejected": -0.5167360901832581, | |
| "logps/chosen": -323.29119873046875, | |
| "logps/rejected": -265.58221435546875, | |
| "loss": 0.5039, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 1.0098955631256104, | |
| "rewards/confidence": -0.42422398924827576, | |
| "rewards/confidence_mean_diff": 0.42422398924827576, | |
| "rewards/confidence_moving_diff": 0.0008432863396592438, | |
| "rewards/margins": 0.9732195138931274, | |
| "rewards/mix_margin": 0.9732197523117065, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": 0.036676160991191864, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.1355500485232917e-07, | |
| "logits/chosen": -0.4795234203338623, | |
| "logits/rejected": -0.5551981329917908, | |
| "logps/chosen": -367.8242492675781, | |
| "logps/rejected": -284.45062255859375, | |
| "loss": 0.381, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.1803219318389893, | |
| "rewards/confidence": -0.1641966998577118, | |
| "rewards/confidence_mean_diff": 0.1641966998577118, | |
| "rewards/confidence_moving_diff": -0.009292250499129295, | |
| "rewards/margins": 1.2318060398101807, | |
| "rewards/mix_margin": 1.2318063974380493, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -0.051483988761901855, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.9524424589030863e-07, | |
| "logits/chosen": -0.47544917464256287, | |
| "logits/rejected": -0.45598697662353516, | |
| "logps/chosen": -368.21197509765625, | |
| "logps/rejected": -327.8885803222656, | |
| "loss": 0.2637, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.4148461818695068, | |
| "rewards/confidence": 0.12870559096336365, | |
| "rewards/confidence_mean_diff": -0.12870559096336365, | |
| "rewards/confidence_moving_diff": -4.9034319090424106e-05, | |
| "rewards/margins": 1.949605941772461, | |
| "rewards/mix_margin": 1.9496057033538818, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -0.5347597599029541, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.7567216966241556e-07, | |
| "logits/chosen": -0.5132138133049011, | |
| "logits/rejected": -0.5720852613449097, | |
| "logps/chosen": -349.05706787109375, | |
| "logps/rejected": -309.68194580078125, | |
| "loss": 0.2546, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.3815641403198242, | |
| "rewards/confidence": 0.15192195773124695, | |
| "rewards/confidence_mean_diff": -0.15192195773124695, | |
| "rewards/confidence_moving_diff": -0.004711526446044445, | |
| "rewards/margins": 1.766579031944275, | |
| "rewards/mix_margin": 1.766579031944275, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -0.3850148320198059, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.5500874226626633e-07, | |
| "logits/chosen": -0.41593313217163086, | |
| "logits/rejected": -0.47519993782043457, | |
| "logps/chosen": -424.2110290527344, | |
| "logps/rejected": -386.99688720703125, | |
| "loss": 0.2319, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.594541072845459, | |
| "rewards/confidence": 0.3976772129535675, | |
| "rewards/confidence_mean_diff": -0.3976772129535675, | |
| "rewards/confidence_moving_diff": 0.0034655616618692875, | |
| "rewards/margins": 2.2624146938323975, | |
| "rewards/mix_margin": 2.2624149322509766, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -0.667873740196228, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.334334072150074e-07, | |
| "logits/chosen": -0.4277438223361969, | |
| "logits/rejected": -0.44190508127212524, | |
| "logps/chosen": -359.75262451171875, | |
| "logps/rejected": -304.85107421875, | |
| "loss": 0.244, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.3704854249954224, | |
| "rewards/confidence": 0.43457871675491333, | |
| "rewards/confidence_mean_diff": -0.43457871675491333, | |
| "rewards/confidence_moving_diff": 0.0003442527668084949, | |
| "rewards/margins": 2.137328624725342, | |
| "rewards/mix_margin": 2.137328624725342, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -0.766843318939209, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.1113352712978995e-07, | |
| "logits/chosen": -0.4778042733669281, | |
| "logits/rejected": -0.5502051115036011, | |
| "logps/chosen": -285.4638671875, | |
| "logps/rejected": -259.30963134765625, | |
| "loss": 0.2673, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.1096071004867554, | |
| "rewards/confidence": 0.36355599761009216, | |
| "rewards/confidence_mean_diff": -0.36355599761009216, | |
| "rewards/confidence_moving_diff": 2.6996247470378876e-05, | |
| "rewards/margins": 1.878248929977417, | |
| "rewards/mix_margin": 1.878249168395996, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -0.7686418294906616, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.8830275666182565e-07, | |
| "logits/chosen": -0.5888835191726685, | |
| "logits/rejected": -0.5946951508522034, | |
| "logps/chosen": -345.4639587402344, | |
| "logps/rejected": -269.433349609375, | |
| "loss": 0.2581, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.4412751197814941, | |
| "rewards/confidence": 0.27181780338287354, | |
| "rewards/confidence_mean_diff": -0.27181780338287354, | |
| "rewards/confidence_moving_diff": -0.0013337878044694662, | |
| "rewards/margins": 1.7757008075714111, | |
| "rewards/mix_margin": 1.7756999731063843, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -0.3344256579875946, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.651393607737495e-07, | |
| "logits/chosen": -0.43257981538772583, | |
| "logits/rejected": -0.5586498975753784, | |
| "logps/chosen": -332.6167907714844, | |
| "logps/rejected": -258.5675354003906, | |
| "loss": 0.2367, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.6590750217437744, | |
| "rewards/confidence": 0.20384028553962708, | |
| "rewards/confidence_mean_diff": -0.20384028553962708, | |
| "rewards/confidence_moving_diff": 0.0033724855165928602, | |
| "rewards/margins": 2.299750804901123, | |
| "rewards/mix_margin": 2.299750804901123, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -0.6406754851341248, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.418444929845241e-07, | |
| "logits/chosen": -0.5128785371780396, | |
| "logits/rejected": -0.5602482557296753, | |
| "logps/chosen": -347.55145263671875, | |
| "logps/rejected": -316.63287353515625, | |
| "loss": 0.2366, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.4995661973953247, | |
| "rewards/confidence": 0.29339924454689026, | |
| "rewards/confidence_mean_diff": -0.29339924454689026, | |
| "rewards/confidence_moving_diff": -0.002348523121327162, | |
| "rewards/margins": 2.240088939666748, | |
| "rewards/mix_margin": 2.2400896549224854, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -0.7405228018760681, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.186204485297965e-07, | |
| "logits/chosen": -0.5206685066223145, | |
| "logits/rejected": -0.49740782380104065, | |
| "logps/chosen": -327.6163024902344, | |
| "logps/rejected": -312.23345947265625, | |
| "loss": 0.259, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.3457434177398682, | |
| "rewards/confidence": 0.5061102509498596, | |
| "rewards/confidence_mean_diff": -0.5061102509498596, | |
| "rewards/confidence_moving_diff": 0.0032404728699475527, | |
| "rewards/margins": 2.1838455200195312, | |
| "rewards/mix_margin": 2.183845281600952, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -0.8381019830703735, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.956689076074607e-07, | |
| "logits/chosen": -0.47606563568115234, | |
| "logits/rejected": -0.5649515986442566, | |
| "logps/chosen": -359.9063415527344, | |
| "logps/rejected": -272.35333251953125, | |
| "loss": 0.2392, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.2465178966522217, | |
| "rewards/confidence": 0.15909627079963684, | |
| "rewards/confidence_mean_diff": -0.15909627079963684, | |
| "rewards/confidence_moving_diff": -0.005671085324138403, | |
| "rewards/margins": 2.0437004566192627, | |
| "rewards/mix_margin": 2.043700695037842, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -0.7971823811531067, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7318918396427674e-07, | |
| "logits/chosen": -0.5379046201705933, | |
| "logits/rejected": -0.5706161260604858, | |
| "logps/chosen": -386.26861572265625, | |
| "logps/rejected": -303.8609619140625, | |
| "loss": 0.2138, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 1.7834584712982178, | |
| "rewards/confidence": 0.13540206849575043, | |
| "rewards/confidence_mean_diff": -0.13540206849575043, | |
| "rewards/confidence_moving_diff": 0.0014076533261686563, | |
| "rewards/margins": 2.3947689533233643, | |
| "rewards/mix_margin": 2.394768476486206, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -0.6113101840019226, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.513764940330155e-07, | |
| "logits/chosen": -0.39151811599731445, | |
| "logits/rejected": -0.473433256149292, | |
| "logps/chosen": -336.6163024902344, | |
| "logps/rejected": -306.16046142578125, | |
| "loss": 0.2558, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.611301064491272, | |
| "rewards/confidence": 0.2504025101661682, | |
| "rewards/confidence_mean_diff": -0.2504025101661682, | |
| "rewards/confidence_moving_diff": 0.000840538355987519, | |
| "rewards/margins": 2.209441661834717, | |
| "rewards/mix_margin": 2.209441661834717, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -0.5981408357620239, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.304202616511362e-07, | |
| "logits/chosen": -0.5112959742546082, | |
| "logits/rejected": -0.5279114842414856, | |
| "logps/chosen": -377.9098815917969, | |
| "logps/rejected": -316.77825927734375, | |
| "loss": 0.1627, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 2.204784631729126, | |
| "rewards/confidence": 0.5538536906242371, | |
| "rewards/confidence_mean_diff": -0.5538536906242371, | |
| "rewards/confidence_moving_diff": -0.005331903696060181, | |
| "rewards/margins": 2.7410061359405518, | |
| "rewards/mix_margin": 2.7410056591033936, | |
| "rewards/real_percentage": 11.774999618530273, | |
| "rewards/rejected": -0.5362212657928467, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.1050247308300944e-07, | |
| "logits/chosen": -0.48956188559532166, | |
| "logits/rejected": -0.5282370448112488, | |
| "logps/chosen": -370.67767333984375, | |
| "logps/rejected": -370.94476318359375, | |
| "loss": 0.1444, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.979029893875122, | |
| "rewards/confidence": 0.7768798470497131, | |
| "rewards/confidence_mean_diff": -0.7768798470497131, | |
| "rewards/confidence_moving_diff": -0.008751118555665016, | |
| "rewards/margins": 5.2986369132995605, | |
| "rewards/mix_margin": 5.298637866973877, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -3.3196072578430176, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 9.179609663085594e-08, | |
| "logits/chosen": -0.478290855884552, | |
| "logits/rejected": -0.5842245817184448, | |
| "logps/chosen": -354.32220458984375, | |
| "logps/rejected": -323.82830810546875, | |
| "loss": 0.1632, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 2.003836154937744, | |
| "rewards/confidence": 1.0290786027908325, | |
| "rewards/confidence_mean_diff": -1.0290786027908325, | |
| "rewards/confidence_moving_diff": 0.009490849450230598, | |
| "rewards/margins": 2.7980473041534424, | |
| "rewards/mix_margin": 2.7980475425720215, | |
| "rewards/real_percentage": 12.175000190734863, | |
| "rewards/rejected": -0.7942115068435669, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.446358055867688e-08, | |
| "logits/chosen": -0.4719129502773285, | |
| "logits/rejected": -0.5351340174674988, | |
| "logps/chosen": -284.57977294921875, | |
| "logps/rejected": -244.1188507080078, | |
| "loss": 0.1959, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.3237833976745605, | |
| "rewards/confidence": 0.36271917819976807, | |
| "rewards/confidence_mean_diff": -0.36271917819976807, | |
| "rewards/confidence_moving_diff": 0.002586688846349716, | |
| "rewards/margins": 2.3666577339172363, | |
| "rewards/mix_margin": 2.3666574954986572, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -1.0428742170333862, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5.8655442373371164e-08, | |
| "logits/chosen": -0.581800639629364, | |
| "logits/rejected": -0.6199262142181396, | |
| "logps/chosen": -420.638671875, | |
| "logps/rejected": -355.60736083984375, | |
| "loss": 0.152, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.8940696716308594, | |
| "rewards/confidence": 0.7953528165817261, | |
| "rewards/confidence_mean_diff": -0.7953528165817261, | |
| "rewards/confidence_moving_diff": -0.006861658301204443, | |
| "rewards/margins": 2.8314507007598877, | |
| "rewards/mix_margin": 2.831450939178467, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -0.9373809695243835, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.450896171388219e-08, | |
| "logits/chosen": -0.5456718802452087, | |
| "logits/rejected": -0.5629149079322815, | |
| "logps/chosen": -384.376953125, | |
| "logps/rejected": -332.3739318847656, | |
| "loss": 0.1365, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 2.1317899227142334, | |
| "rewards/confidence": 0.8724759221076965, | |
| "rewards/confidence_mean_diff": -0.8724759221076965, | |
| "rewards/confidence_moving_diff": 0.006575644016265869, | |
| "rewards/margins": 3.160860300064087, | |
| "rewards/mix_margin": 3.160860061645508, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -1.0290701389312744, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.214698819946879e-08, | |
| "logits/chosen": -0.5238803625106812, | |
| "logits/rejected": -0.5871630907058716, | |
| "logps/chosen": -375.8655090332031, | |
| "logps/rejected": -300.987548828125, | |
| "loss": 0.1748, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.7292721271514893, | |
| "rewards/confidence": 0.5153323411941528, | |
| "rewards/confidence_mean_diff": -0.5153323411941528, | |
| "rewards/confidence_moving_diff": 0.0004902526852674782, | |
| "rewards/margins": 2.5365805625915527, | |
| "rewards/mix_margin": 2.5365803241729736, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -0.8073086738586426, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.1676874589879908e-08, | |
| "logits/chosen": -0.49646130204200745, | |
| "logits/rejected": -0.5325660705566406, | |
| "logps/chosen": -361.79986572265625, | |
| "logps/rejected": -292.0426940917969, | |
| "loss": 0.1878, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.4343092441558838, | |
| "rewards/confidence": 0.5628241300582886, | |
| "rewards/confidence_mean_diff": -0.5628241300582886, | |
| "rewards/confidence_moving_diff": 0.0001227855682373047, | |
| "rewards/margins": 2.628760576248169, | |
| "rewards/mix_margin": 2.628760814666748, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -1.1944514513015747, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.3189544521990032e-08, | |
| "logits/chosen": -0.5395928025245667, | |
| "logits/rejected": -0.5778788328170776, | |
| "logps/chosen": -332.5323791503906, | |
| "logps/rejected": -296.8447265625, | |
| "loss": 0.1826, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.4775941371917725, | |
| "rewards/confidence": 0.5103145837783813, | |
| "rewards/confidence_mean_diff": -0.5103145837783813, | |
| "rewards/confidence_moving_diff": -0.003172731725499034, | |
| "rewards/margins": 2.3591160774230957, | |
| "rewards/mix_margin": 2.3591160774230957, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -0.8815220594406128, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 6.7587029187732014e-09, | |
| "logits/chosen": -0.5066567659378052, | |
| "logits/rejected": -0.5228812098503113, | |
| "logps/chosen": -346.0731201171875, | |
| "logps/rejected": -309.6691589355469, | |
| "loss": 0.1769, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.6007888317108154, | |
| "rewards/confidence": 0.6578723788261414, | |
| "rewards/confidence_mean_diff": -0.6578723788261414, | |
| "rewards/confidence_moving_diff": 0.004312982317060232, | |
| "rewards/margins": 2.8280742168426514, | |
| "rewards/mix_margin": 2.8280739784240723, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -1.2272855043411255, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.4401959275140437e-09, | |
| "logits/chosen": -0.4290226399898529, | |
| "logits/rejected": -0.4782096743583679, | |
| "logps/chosen": -323.8050231933594, | |
| "logps/rejected": -286.50225830078125, | |
| "loss": 0.1644, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.5553691387176514, | |
| "rewards/confidence": 0.5026761889457703, | |
| "rewards/confidence_mean_diff": -0.5026761889457703, | |
| "rewards/confidence_moving_diff": -0.0073528410866856575, | |
| "rewards/margins": 2.44854474067688, | |
| "rewards/mix_margin": 2.4485442638397217, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -0.8931753039360046, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.715259456224084e-10, | |
| "logits/chosen": -0.5070622563362122, | |
| "logits/rejected": -0.5159127712249756, | |
| "logps/chosen": -368.29248046875, | |
| "logps/rejected": -357.6094665527344, | |
| "loss": 0.1676, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.714816689491272, | |
| "rewards/confidence": 0.8204771876335144, | |
| "rewards/confidence_mean_diff": -0.8204771876335144, | |
| "rewards/confidence_moving_diff": 0.002654359443113208, | |
| "rewards/margins": 2.7493791580200195, | |
| "rewards/mix_margin": 2.7493796348571777, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -1.034562587738037, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 375, | |
| "total_flos": 0.0, | |
| "train_loss": 0.3348727149963379, | |
| "train_runtime": 2536.3599, | |
| "train_samples_per_second": 2.366, | |
| "train_steps_per_second": 0.148 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |